Clean the environment.
Set locations, and the working directory.
A package-installation function.
Load those packages.
We will create a datestamp and define the Utrecht Science Park Colour Scheme.
# Function to grep data from glm()/lm()
GLM.CON <- function(fit, DATASET, x_name, y, verbose=c(TRUE,FALSE)){
cat("Analyzing in dataset '", DATASET ,"' the association of '", x_name ,"' with '", y ,"' .\n")
if (nrow(summary(fit)$coefficients) == 1) {
output = c(DATASET, x_name, y, rep(NA,8))
cat("Model not fitted; probably singular.\n")
}else {
cat("Collecting data.\n\n")
effectsize = summary(fit)$coefficients[2,1]
SE = summary(fit)$coefficients[2,2]
OReffect = exp(summary(fit)$coefficients[2,1])
CI_low = exp(effectsize - 1.96 * SE)
CI_up = exp(effectsize + 1.96 * SE)
tvalue = summary(fit)$coefficients[2,3]
pvalue = summary(fit)$coefficients[2,4]
R = summary(fit)$r.squared
R.adj = summary(fit)$adj.r.squared
sample_size = nrow(model.frame(fit))
AE_N = AEDB.CEA.samplesize
Perc_Miss = 100 - ((sample_size * 100)/AE_N)
output = c(DATASET, x_name, y, effectsize, SE, OReffect, CI_low, CI_up, tvalue, pvalue, R, R.adj, AE_N, sample_size, Perc_Miss)
if (verbose == TRUE) {
cat("We have collected the following and summarize it in an object:\n")
cat("Dataset...................:", DATASET, "\n")
cat("Score/Exposure/biomarker..:", x_name, "\n")
cat("Trait/outcome.............:", y, "\n")
cat("Effect size...............:", round(effectsize, 6), "\n")
cat("Standard error............:", round(SE, 6), "\n")
cat("Odds ratio (effect size)..:", round(OReffect, 3), "\n")
cat("Lower 95% CI..............:", round(CI_low, 3), "\n")
cat("Upper 95% CI..............:", round(CI_up, 3), "\n")
cat("T-value...................:", round(tvalue, 6), "\n")
cat("P-value...................:", signif(pvalue, 8), "\n")
cat("R^2.......................:", round(R, 6), "\n")
cat("Adjusted r^2..............:", round(R.adj, 6), "\n")
cat("Sample size of AE DB......:", AE_N, "\n")
cat("Sample size of model......:", sample_size, "\n")
cat("Missing data %............:", round(Perc_Miss, 6), "\n")
} else {
cat("Collecting data in summary object.\n")
}
}
return(output)
print(output)
}
GLM.BIN <- function(fit, DATASET, x_name, y, verbose=c(TRUE,FALSE)){
cat("Analyzing in dataset '", DATASET ,"' the association of '", x_name ,"' with '", y ,"' ...\n")
if (nrow(summary(fit)$coefficients) == 1) {
output = c(DATASET, x_name, y, rep(NA,9))
cat("Model not fitted; probably singular.\n")
}else {
cat("Collecting data...\n")
effectsize = summary(fit)$coefficients[2,1]
SE = summary(fit)$coefficients[2,2]
OReffect = exp(summary(fit)$coefficients[2,1])
CI_low = exp(effectsize - 1.96 * SE)
CI_up = exp(effectsize + 1.96 * SE)
zvalue = summary(fit)$coefficients[2,3]
pvalue = summary(fit)$coefficients[2,4]
dev <- fit$deviance
nullDev <- fit$null.deviance
modelN <- length(fit$fitted.values)
R.l <- 1 - dev / nullDev
R.cs <- 1 - exp(-(nullDev - dev) / modelN)
R.n <- R.cs / (1 - (exp(-nullDev/modelN)))
sample_size = nrow(model.frame(fit))
AE_N = AEDB.CEA.samplesize
Perc_Miss = 100 - ((sample_size * 100)/AE_N)
output = c(DATASET, x_name, y, effectsize, SE, OReffect, CI_low, CI_up, zvalue, pvalue, R.l, R.cs, R.n, AE_N, sample_size, Perc_Miss)
if (verbose == TRUE) {
cat("We have collected the following and summarize it in an object:\n")
cat("Dataset...................:", DATASET, "\n")
cat("Score/Exposure/biomarker..:", x_name, "\n")
cat("Trait/outcome.............:", y, "\n")
cat("Effect size...............:", round(effectsize, 6), "\n")
cat("Standard error............:", round(SE, 6), "\n")
cat("Odds ratio (effect size)..:", round(OReffect, 3), "\n")
cat("Lower 95% CI..............:", round(CI_low, 3), "\n")
cat("Upper 95% CI..............:", round(CI_up, 3), "\n")
cat("Z-value...................:", round(zvalue, 6), "\n")
cat("P-value...................:", signif(pvalue, 8), "\n")
cat("Hosmer and Lemeshow r^2...:", round(R.l, 6), "\n")
cat("Cox and Snell r^2.........:", round(R.cs, 6), "\n")
cat("Nagelkerke's pseudo r^2...:", round(R.n, 6), "\n")
cat("Sample size of AE DB......:", AE_N, "\n")
cat("Sample size of model......:", sample_size, "\n")
cat("Missing data %............:", round(Perc_Miss, 6), "\n")
} else {
cat("Collecting data in summary object.\n")
}
}
return(output)
print(output)
}
Using a Mendelian Randomization approach, we recently examined associations between the circulating levels of 41 cytokines and growth factors and the risk of stroke in the MEGASTROKE GWAS dataset (67,000 stroke cases and 450,000 controls) and found Monocyte chemoattractant protein-1 (MCP-1) as the cytokine showing the strongest association with stroke, particularly large artery and cardioembolic stroke (Georgakis et al., 2019a). Genetically elevated MCP-1 levels were also associated with a higher risk of coronary artery disease and myocardial infarction (Georgakis et al., 2019a). Further, in a meta-analysis of 6 observational population-based of longitudinal cohort studies we recently showed that baseline levels of MCP-1 were associated with a higher risk of ischemic stroke over follow-up (Georgakis et al., 2019b). While these data suggest a central role of MCP-1 in the pathogenesis of atherosclerosis, it remains unknown if MCP-1 levels in the blood really reflect MCP-1 activity. MCP-1 is expressed in the atherosclerotic plaque and attracts monocytes in the subendothelial space (Nelken et al., 1991; Papadopoulou et al., 2008; Takeya et al., 1993; Wilcox et al., 1994). Thus, MCP-1 levels in the plaque might more strongly reflect MCP-1 signaling. However, it remains unknown if MCP-1 plaque levels associate with plaque vulnerability or risk of cardiovascular events.
Against this background we now aim to make use of the data from Athero-Express Biobank Study to explore the associations of MCP-1 protein levels in the atherosclerotic plaques from patients undergoing carotid endarterectomy with phenotypes of plaque vulnerability and secondary vascular events over a follow-up of three years.
Blood
OLINK-platform
THESE DATA ARE NOT AVAILABLE YET
Plaque
Luminex-platform, measured by Luminex
MCP1 and MCP1_pg_ug_2015. We consider the latter the best possible measurement as this was corrected for plaque total protein concentration.FACS platform
Loading Athero-Express clinical data.
require(haven)
# AEDB <- haven::read_sav(paste0(AEDB_loc, "/2019-3NEW_AtheroExpressDatabase_ScientificAE_02072019_IC_added.sav"))
AEDB <- haven::read_sav(paste0(AEDB_loc, "/2020_1_NEW_AtheroExpressDatabase_ScientificAE_16-03-2020.sav"))
head(AEDB)
NA
NA
NA
We can examine the contents of the Athero-Express Biobank dataset to know what each variable is called, what class (type) it has, and what the variable description is.
There is an excellent post on this: https://www.r-bloggers.com/working-with-spss-labels-in-r/.
AEDB %>% sjPlot::view_df(show.type = TRUE,
show.frq = TRUE,
show.prc = TRUE,
show.na = TRUE,
max.len = TRUE,
wrap.labels = 20,
verbose = FALSE,
use.viewer = FALSE,
file = paste0(OUT_loc, "/", Today, ".AEDB.dictionary.html"))
We need to be very strict in defining symptoms. Therefore we will fix a new variable that groups symptoms at inclusion.
Coding of symptoms is as follows:
We will group as follows in Symptoms.5G:
We will also group as follows in AsymptSympt:
We will also group as follows in AsymptSympt2G:
# Fix symptoms
attach(AEDB)
AEDB$sympt[is.na(AEDB$sympt)] <- -999
# Symptoms.5G
AEDB[,"Symptoms.5G"] <- NA
# AEDB$Symptoms.5G[sympt == "NA"] <- "Asymptomatic"
AEDB$Symptoms.5G[sympt == -999] <- NA
AEDB$Symptoms.5G[sympt == 0] <- "Asymptomatic"
AEDB$Symptoms.5G[sympt == 1 | sympt == 7 | sympt == 13] <- "TIA"
AEDB$Symptoms.5G[sympt == 2 | sympt == 3] <- "Stroke"
AEDB$Symptoms.5G[sympt == 4 | sympt == 14 | sympt == 15 ] <- "Ocular"
AEDB$Symptoms.5G[sympt == 8 | sympt == 11] <- "Retinal infarction"
AEDB$Symptoms.5G[sympt == 5 | sympt == 9 | sympt == 10 | sympt == 12 | sympt == 16 | sympt == 17] <- "Other"
# AsymptSympt
AEDB[,"AsymptSympt"] <- NA
AEDB$AsymptSympt[sympt == -999] <- NA
AEDB$AsymptSympt[sympt == 0] <- "Asymptomatic"
AEDB$AsymptSympt[sympt == 1 | sympt == 7 | sympt == 13 | sympt == 2 | sympt == 3] <- "Symptomatic"
AEDB$AsymptSympt[sympt == 4 | sympt == 14 | sympt == 15 | sympt == 8 | sympt == 11 | sympt == 5 | sympt == 9 | sympt == 10 | sympt == 12 | sympt == 16 | sympt == 17] <- "Ocular and others"
# AsymptSympt
AEDB[,"AsymptSympt2G"] <- NA
AEDB$AsymptSympt2G[sympt == -999] <- NA
AEDB$AsymptSympt2G[sympt == 0] <- "Asymptomatic"
AEDB$AsymptSympt2G[sympt == 1 | sympt == 7 | sympt == 13 | sympt == 2 | sympt == 3 | sympt == 4 | sympt == 14 | sympt == 15 | sympt == 8 | sympt == 11 | sympt == 5 | sympt == 9 | sympt == 10 | sympt == 12 | sympt == 16 | sympt == 17] <- "Symptomatic"
detach(AEDB)
# table(AEDB$sympt, useNA = "ifany")
# table(AEDB$AsymptSympt2G, useNA = "ifany")
# table(AEDB$Symptoms.5G, useNA = "ifany")
#
# table(AEDB$AsymptSympt2G, AEDB$sympt, useNA = "ifany")
# table(AEDB$Symptoms.5G, AEDB$sympt, useNA = "ifany")
table(AEDB$AsymptSympt2G, AEDB$Symptoms.5G, useNA = "ifany")
Asymptomatic Ocular Other Retinal infarction Stroke TIA <NA>
Asymptomatic 333 0 0 0 0 0 0
Symptomatic 0 416 119 43 732 1045 0
<NA> 0 0 0 0 0 0 1103
# AEDB.temp <- subset(AEDB, select = c("STUDY_NUMBER", "UPID", "Age", "Gender", "Hospital", "Artery_summary", "sympt", "Symptoms.5G", "AsymptSympt"))
# require(labelled)
# AEDB.temp$Gender <- to_factor(AEDB.temp$Gender)
# AEDB.temp$Hospital <- to_factor(AEDB.temp$Hospital)
# AEDB.temp$Artery_summary <- to_factor(AEDB.temp$Artery_summary)
#
# DT::datatable(AEDB.temp[1:10,], caption = "Excerpt of the whole AEDB.", rownames = FALSE)
#
# table(AEDB.temp$Symptoms.5G, AEDB.temp$AsymptSympt)
#
# rm(AEDB.temp)
We will also fix the plaquephenotypes variable.
Coding of symptoms is as follows:
# Fix plaquephenotypes
attach(AEDB)
AEDB[,"OverallPlaquePhenotype"] <- NA
AEDB$OverallPlaquePhenotype[plaquephenotype == -999] <- NA
AEDB$OverallPlaquePhenotype[plaquephenotype == -999] <- NA
AEDB$OverallPlaquePhenotype[plaquephenotype == 1] <- "fibrous"
AEDB$OverallPlaquePhenotype[plaquephenotype == 2] <- "fibroatheromatous"
AEDB$OverallPlaquePhenotype[plaquephenotype == 3] <- "atheromatous"
detach(AEDB)
table(AEDB$OverallPlaquePhenotype)
atheromatous fibroatheromatous fibrous
550 841 1439
# AEDB.temp <- subset(AEDB, select = c("STUDY_NUMBER", "UPID", "Age", "Gender", "Hospital", "Artery_summary", "plaquephenotype", "OverallPlaquePhenotype"))
# require(labelled)
# AEDB.temp$Gender <- to_factor(AEDB.temp$Gender)
# AEDB.temp$Hospital <- to_factor(AEDB.temp$Hospital)
# AEDB.temp$Artery_summary <- to_factor(AEDB.temp$Artery_summary)
#
# DT::datatable(AEDB.temp[1:10,], caption = "Excerpt of the whole AEDB.", rownames = FALSE)
#
# rm(AEDB.temp)
We will also fix the diabetes status variable. We define diabetes as history of a diagnosis and/or use of glucose-lowering medications.
# Fix diabetes
attach(AEDB)
AEDB[,"DiabetesStatus"] <- NA
AEDB$DiabetesStatus[DM.composite == -999] <- NA
AEDB$DiabetesStatus[DM.composite == 0] <- "Control (no Diabetes Dx/Med)"
AEDB$DiabetesStatus[DM.composite == 1] <- "Diabetes"
detach(AEDB)
table(AEDB$DM.composite)
0 1
2764 985
table(AEDB$DiabetesStatus)
Control (no Diabetes Dx/Med) Diabetes
2764 985
# AEDB.temp <- subset(AEDB, select = c("STUDY_NUMBER", "UPID", "Age", "Gender", "Hospital", "Artery_summary", "DM.composite", "DiabetesStatus"))
# require(labelled)
# AEDB.temp$Gender <- to_factor(AEDB.temp$Gender)
# AEDB.temp$Hospital <- to_factor(AEDB.temp$Hospital)
# AEDB.temp$Artery_summary <- to_factor(AEDB.temp$Artery_summary)
# AEDB.temp$DiabetesStatus <- to_factor(AEDB.temp$DiabetesStatus)
#
# DT::datatable(AEDB.temp[1:10,], caption = "Excerpt of the whole AEDB.", rownames = FALSE)
#
# rm(AEDB.temp)
We will also fix the smoking status variable. We are interested in whether someone never, ever or is currently (at the time of inclusion) smoking. This is based on the questionnaire.
diet801: are you a smoker?diet802: did you smoke in the past?We already have some variables indicating smoking status:
SmokingReported: patient has reported to smoke.SmokingYearOR: smoking in the year of surgery?SmokerCurrent: currently smoking?require(labelled)
AEDB$diet801 <- to_factor(AEDB$diet801)
AEDB$diet802 <- to_factor(AEDB$diet802)
AEDB$diet805 <- to_factor(AEDB$diet805)
AEDB$SmokingReported <- to_factor(AEDB$SmokingReported)
AEDB$SmokerCurrent <- to_factor(AEDB$SmokerCurrent)
AEDB$SmokingYearOR <- to_factor(AEDB$SmokingYearOR)
# table(AEDB$diet801)
# table(AEDB$diet802)
# table(AEDB$SmokingReported)
# table(AEDB$SmokerCurrent)
# table(AEDB$SmokingYearOR)
# table(AEDB$SmokingReported, AEDB$SmokerCurrent, useNA = "ifany", dnn = c("Reported smoking", "Current smoker"))
#
# table(AEDB$diet801, AEDB$diet802, useNA = "ifany", dnn = c("Smoker", "Past smoker"))
cat("\nFixing smoking status.\n")
Fixing smoking status.
attach(AEDB)
AEDB[,"SmokerStatus"] <- NA
AEDB$SmokerStatus[diet802 == "don't know"] <- "Never smoked"
AEDB$SmokerStatus[diet802 == "I still smoke"] <- "Current smoker"
AEDB$SmokerStatus[SmokerCurrent == "no" & diet802 == "no"] <- "Never smoked"
AEDB$SmokerStatus[SmokerCurrent == "no" & diet802 == "yes"] <- "Ex-smoker"
AEDB$SmokerStatus[SmokerCurrent == "yes"] <- "Current smoker"
AEDB$SmokerStatus[SmokerCurrent == "no data available/missing"] <- NA
# AEDB$SmokerStatus[is.na(SmokerCurrent)] <- "Never smoked"
detach(AEDB)
cat("\n* Current smoking status.\n")
* Current smoking status.
table(AEDB$SmokerCurrent,
useNA = "ifany",
dnn = c("Current smoker"))
Current smoker
no data available/missing no yes <NA>
0 2364 1308 119
cat("\n* Updated smoking status.\n")
* Updated smoking status.
table(AEDB$SmokerStatus,
useNA = "ifany",
dnn = c("Updated smoking status"))
Updated smoking status
Current smoker Ex-smoker Never smoked <NA>
1308 1814 389 280
cat("\n* Comparing to 'SmokerCurrent'.\n")
* Comparing to 'SmokerCurrent'.
table(AEDB$SmokerStatus, AEDB$SmokerCurrent,
useNA = "ifany",
dnn = c("Updated smoking status", "Current smoker"))
Current smoker
Updated smoking status no data available/missing no yes <NA>
Current smoker 0 0 1308 0
Ex-smoker 0 1814 0 0
Never smoked 0 389 0 0
<NA> 0 161 0 119
# AEDB.temp <- subset(AEDB, select = c("STUDY_NUMBER", "UPID", "Age", "Gender", "Hospital", "Artery_summary", "DM.composite", "DiabetesStatus"))
# require(labelled)
# AEDB.temp$Gender <- to_factor(AEDB.temp$Gender)
# AEDB.temp$Hospital <- to_factor(AEDB.temp$Hospital)
# AEDB.temp$Artery_summary <- to_factor(AEDB.temp$Artery_summary)
# AEDB.temp$DiabetesStatus <- to_factor(AEDB.temp$DiabetesStatus)
#
# DT::datatable(AEDB.temp[1:10,], caption = "Excerpt of the whole AEDB.", rownames = FALSE)
#
# rm(AEDB.temp)
We will also fix the alcohol status variable.
# Fix diabetes
attach(AEDB)
AEDB[,"AlcoholUse"] <- NA
AEDB$AlcoholUse[diet810 == -999] <- NA
AEDB$AlcoholUse[diet810 == 0] <- "No"
AEDB$AlcoholUse[diet810 == 1] <- "Yes"
detach(AEDB)
table(AEDB$AlcoholUse)
No Yes
1238 2345
# AEDB.temp <- subset(AEDB, select = c("STUDY_NUMBER", "UPID", "Age", "Gender", "Hospital", "Artery_summary", "diet810", "AlcoholUse"))
# require(labelled)
# AEDB.temp$Gender <- to_factor(AEDB.temp$Gender)
# AEDB.temp$Hospital <- to_factor(AEDB.temp$Hospital)
# AEDB.temp$Artery_summary <- to_factor(AEDB.temp$Artery_summary)
# AEDB.temp$AlcoholUse <- to_factor(AEDB.temp$AlcoholUse)
#
# DT::datatable(AEDB.temp[1:10,], caption = "Excerpt of the whole AEDB.", rownames = FALSE)
#
# rm(AEDB.temp)
We will also fix a history of CAD, stroke or peripheral intervention status variable. This will be based on CAD_history, Stroke_history, and Peripheral.interv
# Fix diabetes
attach(AEDB)
AEDB[,"MedHx_CVD"] <- NA
AEDB$MedHx_CVD[CAD_history == 0 | Stroke_history == 0 | Peripheral.interv == 0] <- "No"
AEDB$MedHx_CVD[CAD_history == 1 | Stroke_history == 1 | Peripheral.interv == 1] <- "yes"
detach(AEDB)
table(AEDB$CAD_history)
0 1
2430 1285
table(AEDB$Stroke_history)
0 1
2763 947
table(AEDB$Peripheral.interv)
0 1
2579 1099
table(AEDB$MedHx_CVD)
No yes
1309 2475
# AEDB.temp <- subset(AEDB, select = c("STUDY_NUMBER", "UPID", "Age", "Gender", "Hospital", "Artery_summary", "diet810", "AlcoholUse"))
# require(labelled)
# AEDB.temp$Gender <- to_factor(AEDB.temp$Gender)
# AEDB.temp$Hospital <- to_factor(AEDB.temp$Hospital)
# AEDB.temp$Artery_summary <- to_factor(AEDB.temp$Artery_summary)
# AEDB.temp$AlcoholUse <- to_factor(AEDB.temp$AlcoholUse)
#
# DT::datatable(AEDB.temp[1:10,], caption = "Excerpt of the whole AEDB.", rownames = FALSE)
#
# rm(AEDB.temp)
We are interested in the following variables at baseline.
MCP1, and MCP1_pg_ug_2015)cat("===========================================================================================\n")
===========================================================================================
cat("CREATE BASELINE TABLE\n")
CREATE BASELINE TABLE
# Baseline table variables
basetable_vars = c("Hospital", "ORyear",
"Age", "Gender",
"TC_finalCU", "LDL_finalCU", "HDL_finalCU", "TG_finalCU",
"TC_final", "LDL_final", "HDL_final", "TG_final",
"hsCRP_plasma",
"systolic", "diastoli", "GFR_MDRD", "BMI",
"KDOQI", "BMI_WHO",
"SmokerStatus", "AlcoholUse",
"DiabetesStatus",
"Hypertension.selfreport", "Hypertension.selfreportdrug", "Hypertension.composite", "Hypertension.drugs",
"Med.anticoagulants", "Med.all.antiplatelet", "Med.Statin.LLD",
"Stroke_Dx", "sympt", "Symptoms.5G", "AsymptSympt", "AsymptSympt2G",
"restenos", "stenose",
"MedHx_CVD", "CAD_history", "PAOD", "Peripheral.interv",
"EP_composite", "EP_composite_time",
"macmean0", "smcmean0", "Macrophages.bin", "SMC.bin",
"neutrophils", "Mast_cells_plaque",
"IPH.bin", "vessel_density_averaged",
"Calc.bin", "Collagen.bin",
"Fat.bin_10", "Fat.bin_40", "OverallPlaquePhenotype",
"IL6", "IL6_pg_ug_2015", "IL6R_pg_ug_2015",
"MCP1", "MCP1_pg_ug_2015")
basetable_bin = c("Gender",
"KDOQI", "BMI_WHO",
"SmokerStatus", "AlcoholUse",
"DiabetesStatus",
"Hypertension.selfreport", "Hypertension.selfreportdrug", "Hypertension.composite", "Hypertension.drugs",
"Med.anticoagulants", "Med.all.antiplatelet", "Med.Statin.LLD",
"Stroke_Dx", "sympt", "Symptoms.5G", "AsymptSympt", "AsymptSympt2G",
"restenos", "stenose",
"CAD_history", "PAOD", "Peripheral.interv",
"EP_composite", "Macrophages.bin", "SMC.bin",
"IPH.bin",
"Calc.bin", "Collagen.bin",
"Fat.bin_10", "Fat.bin_40", "OverallPlaquePhenotype")
# basetable_bin
basetable_con = basetable_vars[!basetable_vars %in% basetable_bin]
# basetable_con
Showing the baseline table of the whole Athero-Express Biobank.
# Create baseline tables
# http://rstudio-pubs-static.s3.amazonaws.com/13321_da314633db924dc78986a850813a50d5.html
AEDB.tableOne = print(CreateTableOne(vars = basetable_vars,
# factorVars = basetable_bin,
# strata = "Symptoms.4g",
data = AEDB, includeNA = TRUE),
nonnormal = c(), missing = TRUE,
quote = FALSE, noSpaces = FALSE, showAllLevels = TRUE, explain = TRUE,
format = "pf",
contDigits = 3)[,1:3]
level Overall Missing
n 3791
Hospital % (freq) St. Antonius, Nieuwegein 45.8 (1735) 0.0
UMC Utrecht 54.2 (2056)
ORyear % (freq) No data available/missing 0.0 ( 0) 0.0
2002 2.5 ( 94)
2003 5.4 ( 204)
2004 7.6 ( 289)
2005 8.2 ( 309)
2006 7.5 ( 285)
2007 6.2 ( 234)
2008 5.9 ( 223)
2009 7.0 ( 267)
2010 8.1 ( 307)
2011 7.1 ( 269)
2012 8.2 ( 312)
2013 6.9 ( 262)
2014 7.9 ( 299)
2015 2.1 ( 79)
2016 3.3 ( 124)
2017 2.2 ( 85)
2018 2.1 ( 80)
2019 1.8 ( 69)
Age (mean (SD)) 68.907 (9.322) 0.0
Gender % (freq) female 30.6 (1160) 0.0
male 69.4 (2631)
TC_finalCU (mean (SD)) 185.220 (81.513) 46.8
LDL_finalCU (mean (SD)) 106.483 (40.683) 54.5
HDL_finalCU (mean (SD)) 46.593 (16.730) 51.1
TG_finalCU (mean (SD)) 154.233 (99.797) 51.8
TC_final (mean (SD)) 4.797 (2.111) 46.8
LDL_final (mean (SD)) 2.758 (1.054) 54.5
HDL_final (mean (SD)) 1.207 (0.433) 51.1
TG_final (mean (SD)) 1.743 (1.128) 51.8
hsCRP_plasma (mean (SD)) 19.250 (206.888) 60.7
systolic (mean (SD)) 150.907 (25.117) 13.5
diastoli (mean (SD)) 79.934 (21.853) 13.5
GFR_MDRD (mean (SD)) 74.849 (24.745) 6.5
BMI (mean (SD)) 26.336 (4.051) 7.5
KDOQI % (freq) No data available/missing 0.0 ( 0) 6.6
Normal kidney function 22.1 ( 839)
CKD 2 (Mild) 47.2 (1788)
CKD 3 (Moderate) 21.9 ( 830)
CKD 4 (Severe) 1.4 ( 53)
CKD 5 (Failure) 0.8 ( 32)
<NA> 6.6 ( 249)
BMI_WHO % (freq) No data available/missing 0.0 ( 0) 7.5
Underweight 1.2 ( 44)
Normal 35.2 (1335)
Overweight 42.0 (1594)
Obese 14.1 ( 533)
<NA> 7.5 ( 285)
SmokerStatus % (freq) Current smoker 34.5 (1308) 7.4
Ex-smoker 47.9 (1814)
Never smoked 10.3 ( 389)
<NA> 7.4 ( 280)
AlcoholUse % (freq) No 32.7 (1238) 5.5
Yes 61.9 (2345)
<NA> 5.5 ( 208)
DiabetesStatus % (freq) Control (no Diabetes Dx/Med) 72.9 (2764) 1.1
Diabetes 26.0 ( 985)
<NA> 1.1 ( 42)
Hypertension.selfreport % (freq) No data available/missing 0.0 ( 0) 4.0
no 23.7 ( 899)
yes 72.3 (2741)
<NA> 4.0 ( 151)
Hypertension.selfreportdrug % (freq) No data available/missing 0.0 ( 0) 5.5
no 28.6 (1085)
yes 65.9 (2499)
<NA> 5.5 ( 207)
Hypertension.composite % (freq) No data available/missing 0.0 ( 0) 1.3
no 13.3 ( 504)
yes 85.4 (3239)
<NA> 1.3 ( 48)
Hypertension.drugs % (freq) No data available/missing 0.0 ( 0) 1.5
no 21.0 ( 797)
yes 77.5 (2939)
<NA> 1.5 ( 55)
Med.anticoagulants % (freq) No data available/missing 0.0 ( 0) 1.6
no 85.6 (3246)
yes 12.8 ( 485)
<NA> 1.6 ( 60)
Med.all.antiplatelet % (freq) No data available/missing 0.0 ( 0) 1.6
no 13.7 ( 521)
yes 84.7 (3211)
<NA> 1.6 ( 59)
Med.Statin.LLD % (freq) No data available/missing 0.0 ( 0) 1.5
no 21.8 ( 826)
yes 76.7 (2909)
<NA> 1.5 ( 56)
Stroke_Dx % (freq) Missing 0.0 ( 0) 8.1
No stroke diagnosed 74.4 (2822)
Stroke diagnosed 17.5 ( 662)
<NA> 8.1 ( 307)
sympt % (freq) missing 29.1 (1103) 0.0
Asymptomatic 8.8 ( 333)
TIA 27.4 (1040)
minor stroke 12.1 ( 458)
Major stroke 7.2 ( 274)
Amaurosis fugax 10.5 ( 398)
Four vessel disease 1.1 ( 43)
Vertebrobasilary TIA 0.1 ( 5)
Retinal infarction 1.0 ( 37)
Symptomatic, but aspecific symtoms 1.6 ( 61)
Contralateral symptomatic occlusion 0.3 ( 12)
retinal infarction 0.2 ( 6)
armclaudication due to occlusion subclavian artery, CEA needed for bypass 0.0 ( 1)
retinal infarction + TIAs 0.0 ( 0)
Ocular ischemic syndrome 0.5 ( 18)
ischemisch glaucoom 0.0 ( 0)
subclavian steal syndrome 0.1 ( 2)
TGA 0.0 ( 0)
Symptoms.5G % (freq) Asymptomatic 8.8 ( 333) 29.1
Ocular 11.0 ( 416)
Other 3.1 ( 119)
Retinal infarction 1.1 ( 43)
Stroke 19.3 ( 732)
TIA 27.6 (1045)
<NA> 29.1 (1103)
AsymptSympt % (freq) Asymptomatic 8.8 ( 333) 29.1
Ocular and others 15.2 ( 578)
Symptomatic 46.9 (1777)
<NA> 29.1 (1103)
AsymptSympt2G % (freq) Asymptomatic 8.8 ( 333) 29.1
Symptomatic 62.1 (2355)
<NA> 29.1 (1103)
restenos % (freq) missing 0.0 ( 0) 4.0
de novo 87.0 (3297)
restenosis 8.8 ( 334)
stenose bij angioseal na PTCA 0.2 ( 7)
<NA> 4.0 ( 153)
stenose % (freq) missing 0.0 ( 0) 7.0
0-49% 0.7 ( 25)
50-70% 6.8 ( 256)
70-90% 35.6 (1349)
90-99% 29.9 (1132)
100% (Occlusion) 14.8 ( 560)
NA 0.1 ( 3)
50-99% 2.6 ( 99)
70-99% 2.6 ( 100)
99 0.1 ( 2)
<NA> 7.0 ( 265)
MedHx_CVD % (freq) No 34.5 (1309) 0.2
yes 65.3 (2475)
<NA> 0.2 ( 7)
CAD_history % (freq) Missing 0.0 ( 0) 2.0
No history CAD 64.1 (2430)
History CAD 33.9 (1285)
<NA> 2.0 ( 76)
PAOD % (freq) missing/no data 0.0 ( 0) 1.6
no 55.1 (2088)
yes 43.4 (1644)
<NA> 1.6 ( 59)
Peripheral.interv % (freq) no 68.0 (2579) 3.0
yes 29.0 (1099)
<NA> 3.0 ( 113)
EP_composite % (freq) No data available. 0.0 ( 0) 7.3
No composite endpoints 60.6 (2297)
Composite endpoints 32.1 (1218)
<NA> 7.3 ( 276)
EP_composite_time (mean (SD)) 2.266 (1.203) 7.4
macmean0 (mean (SD)) 0.656 (1.154) 32.4
smcmean0 (mean (SD)) 2.291 (6.620) 32.4
Macrophages.bin % (freq) no/minor 42.3 (1602) 25.7
moderate/heavy 32.0 (1215)
<NA> 25.7 ( 974)
SMC.bin % (freq) no/minor 22.9 ( 870) 25.3
moderate/heavy 51.8 (1962)
<NA> 25.3 ( 959)
neutrophils (mean (SD)) 162.985 (490.469) 91.0
Mast_cells_plaque (mean (SD)) 165.663 (163.421) 93.0
IPH.bin % (freq) no 32.3 (1223) 24.8
yes 42.9 (1628)
<NA> 24.8 ( 940)
vessel_density_averaged (mean (SD)) 8.030 (6.348) 48.0
Calc.bin % (freq) no/minor 37.9 (1437) 24.7
moderate/heavy 37.4 (1416)
<NA> 24.7 ( 938)
Collagen.bin % (freq) no/minor 14.2 ( 540) 25.2
moderate/heavy 60.6 (2297)
<NA> 25.2 ( 954)
Fat.bin_10 % (freq) <10% 32.3 (1226) 24.7
>10% 42.9 (1628)
<NA> 24.7 ( 937)
Fat.bin_40 % (freq) <40% 60.0 (2274) 24.7
>40% 15.3 ( 580)
<NA> 24.7 ( 937)
OverallPlaquePhenotype % (freq) atheromatous 14.5 ( 550) 25.3
fibroatheromatous 22.2 ( 841)
fibrous 38.0 (1439)
<NA> 25.3 ( 961)
IL6 (mean (SD)) 94.451 (278.490) 84.5
IL6_pg_ug_2015 (mean (SD)) 0.135 (0.541) 67.2
IL6R_pg_ug_2015 (mean (SD)) 0.212 (0.251) 67.1
MCP1 (mean (SD)) 130.926 (118.422) 83.7
MCP1_pg_ug_2015 (mean (SD)) 0.596 (0.880) 65.5
Showing the baseline table of the CEA patients in the Athero-Express Biobank.
# Create baseline tables
# http://rstudio-pubs-static.s3.amazonaws.com/13321_da314633db924dc78986a850813a50d5.html
AEDB.CEA.tableOne = print(CreateTableOne(vars = basetable_vars,
# factorVars = basetable_bin,
# strata = "Symptoms.4g",
data = AEDB.CEA, includeNA = TRUE),
nonnormal = c(), missing = TRUE,
quote = FALSE, noSpaces = FALSE, showAllLevels = TRUE, explain = TRUE,
format = "pf",
contDigits = 3)[,1:3]
level Overall Missing
n 2421
Hospital % (freq) St. Antonius, Nieuwegein 39.2 ( 948) 0.0
UMC Utrecht 60.8 (1473)
ORyear % (freq) No data available/missing 0.0 ( 0) 0.0
2002 3.3 ( 81)
2003 6.5 ( 157)
2004 7.8 ( 190)
2005 7.6 ( 185)
2006 7.6 ( 183)
2007 6.3 ( 152)
2008 5.7 ( 138)
2009 7.5 ( 181)
2010 6.6 ( 159)
2011 6.7 ( 163)
2012 7.3 ( 176)
2013 6.2 ( 149)
2014 6.7 ( 163)
2015 3.1 ( 76)
2016 3.5 ( 85)
2017 2.7 ( 65)
2018 2.7 ( 66)
2019 2.1 ( 52)
Age (mean (SD)) 69.105 (9.302) 0.0
Gender % (freq) female 30.5 ( 738) 0.0
male 69.5 (1683)
TC_finalCU (mean (SD)) 184.803 (56.262) 38.0
LDL_finalCU (mean (SD)) 108.420 (41.744) 45.6
HDL_finalCU (mean (SD)) 46.435 (17.005) 41.7
TG_finalCU (mean (SD)) 151.216 (91.277) 42.8
TC_final (mean (SD)) 4.786 (1.457) 38.0
LDL_final (mean (SD)) 2.808 (1.081) 45.6
HDL_final (mean (SD)) 1.203 (0.440) 41.7
TG_final (mean (SD)) 1.709 (1.031) 42.8
hsCRP_plasma (mean (SD)) 19.914 (231.655) 53.0
systolic (mean (SD)) 152.419 (25.166) 11.3
diastoli (mean (SD)) 81.318 (25.188) 11.3
GFR_MDRD (mean (SD)) 73.121 (21.152) 5.4
BMI (mean (SD)) 26.488 (3.977) 5.9
KDOQI % (freq) No data available/missing 0.0 ( 0) 5.5
Normal kidney function 19.1 ( 462)
CKD 2 (Mild) 50.9 (1232)
CKD 3 (Moderate) 22.8 ( 553)
CKD 4 (Severe) 1.3 ( 32)
CKD 5 (Failure) 0.4 ( 10)
<NA> 5.5 ( 132)
BMI_WHO % (freq) No data available/missing 0.0 ( 0) 5.9
Underweight 1.0 ( 24)
Normal 35.1 ( 850)
Overweight 43.4 (1051)
Obese 14.5 ( 352)
<NA> 5.9 ( 144)
SmokerStatus % (freq) Current smoker 33.2 ( 803) 5.9
Ex-smoker 48.0 (1163)
Never smoked 12.9 ( 313)
<NA> 5.9 ( 142)
AlcoholUse % (freq) No 34.5 ( 835) 4.0
Yes 61.5 (1488)
<NA> 4.0 ( 98)
DiabetesStatus % (freq) Control (no Diabetes Dx/Med) 75.2 (1820) 1.1
Diabetes 23.7 ( 574)
<NA> 1.1 ( 27)
Hypertension.selfreport % (freq) No data available/missing 0.0 ( 0) 3.2
no 24.3 ( 589)
yes 72.4 (1754)
<NA> 3.2 ( 78)
Hypertension.selfreportdrug % (freq) No data available/missing 0.0 ( 0) 4.4
no 29.9 ( 725)
yes 65.6 (1589)
<NA> 4.4 ( 107)
Hypertension.composite % (freq) No data available/missing 0.0 ( 0) 1.2
no 14.6 ( 353)
yes 84.3 (2040)
<NA> 1.2 ( 28)
Hypertension.drugs % (freq) No data available/missing 0.0 ( 0) 1.4
no 23.3 ( 565)
yes 75.3 (1823)
<NA> 1.4 ( 33)
Med.anticoagulants % (freq) No data available/missing 0.0 ( 0) 1.6
no 87.3 (2114)
yes 11.1 ( 269)
<NA> 1.6 ( 38)
Med.all.antiplatelet % (freq) No data available/missing 0.0 ( 0) 1.5
no 12.2 ( 295)
yes 86.3 (2090)
<NA> 1.5 ( 36)
Med.Statin.LLD % (freq) No data available/missing 0.0 ( 0) 1.4
no 20.3 ( 491)
yes 78.3 (1896)
<NA> 1.4 ( 34)
Stroke_Dx % (freq) Missing 0.0 ( 0) 6.9
No stroke diagnosed 71.5 (1731)
Stroke diagnosed 21.6 ( 524)
<NA> 6.9 ( 166)
sympt % (freq) missing 0.0 ( 0) 0.0
Asymptomatic 11.2 ( 270)
TIA 39.7 ( 961)
minor stroke 16.8 ( 407)
Major stroke 9.8 ( 238)
Amaurosis fugax 15.7 ( 379)
Four vessel disease 1.6 ( 38)
Vertebrobasilary TIA 0.2 ( 5)
Retinal infarction 1.4 ( 34)
Symptomatic, but aspecific symtoms 2.2 ( 53)
Contralateral symptomatic occlusion 0.5 ( 11)
retinal infarction 0.2 ( 6)
armclaudication due to occlusion subclavian artery, CEA needed for bypass 0.0 ( 1)
retinal infarction + TIAs 0.0 ( 0)
Ocular ischemic syndrome 0.7 ( 16)
ischemisch glaucoom 0.0 ( 0)
subclavian steal syndrome 0.1 ( 2)
TGA 0.0 ( 0)
Symptoms.5G % (freq) Asymptomatic 11.2 ( 270) 0.0
Ocular 16.3 ( 395)
Other 4.3 ( 105)
Retinal infarction 1.7 ( 40)
Stroke 26.6 ( 645)
TIA 39.9 ( 966)
AsymptSympt % (freq) Asymptomatic 11.2 ( 270) 0.0
Ocular and others 22.3 ( 540)
Symptomatic 66.5 (1611)
AsymptSympt2G % (freq) Asymptomatic 11.2 ( 270) 0.0
Symptomatic 88.8 (2151)
restenos % (freq) missing 0.0 ( 0) 1.4
de novo 93.7 (2268)
restenosis 4.9 ( 118)
stenose bij angioseal na PTCA 0.0 ( 0)
<NA> 1.4 ( 35)
stenose % (freq) missing 0.0 ( 0) 2.0
0-49% 0.5 ( 13)
50-70% 7.8 ( 189)
70-90% 46.6 (1127)
90-99% 38.3 ( 927)
100% (Occlusion) 1.3 ( 31)
NA 0.0 ( 1)
50-99% 0.6 ( 15)
70-99% 2.8 ( 68)
99 0.1 ( 2)
<NA> 2.0 ( 48)
MedHx_CVD % (freq) No 36.8 ( 892) 0.0
yes 63.2 (1529)
CAD_history % (freq) Missing 0.0 ( 0) 1.9
No history CAD 66.8 (1618)
History CAD 31.2 ( 756)
<NA> 1.9 ( 47)
PAOD % (freq) missing/no data 0.0 ( 0) 2.0
no 77.5 (1876)
yes 20.5 ( 497)
<NA> 2.0 ( 48)
Peripheral.interv % (freq) no 77.2 (1868) 2.9
yes 19.9 ( 482)
<NA> 2.9 ( 71)
EP_composite % (freq) No data available. 0.0 ( 0) 5.0
No composite endpoints 70.6 (1709)
Composite endpoints 24.4 ( 590)
<NA> 5.0 ( 122)
EP_composite_time (mean (SD)) 2.479 (1.109) 5.2
macmean0 (mean (SD)) 0.768 (1.184) 29.7
smcmean0 (mean (SD)) 1.985 (2.381) 29.9
Macrophages.bin % (freq) no/minor 34.9 ( 846) 24.1
moderate/heavy 40.9 ( 991)
<NA> 24.1 ( 584)
SMC.bin % (freq) no/minor 24.9 ( 602) 23.8
moderate/heavy 51.3 (1242)
<NA> 23.8 ( 577)
neutrophils (mean (SD)) 147.151 (419.998) 87.4
Mast_cells_plaque (mean (SD)) 164.488 (163.771) 90.0
IPH.bin % (freq) no 30.7 ( 744) 23.5
yes 45.8 (1108)
<NA> 23.5 ( 569)
vessel_density_averaged (mean (SD)) 8.318 (6.388) 35.1
Calc.bin % (freq) no/minor 41.6 (1006) 23.4
moderate/heavy 35.1 ( 849)
<NA> 23.4 ( 566)
Collagen.bin % (freq) no/minor 15.8 ( 382) 23.6
moderate/heavy 60.6 (1467)
<NA> 23.6 ( 572)
Fat.bin_10 % (freq) <10% 22.4 ( 542) 23.3
>10% 54.3 (1314)
<NA> 23.3 ( 565)
Fat.bin_40 % (freq) <40% 56.2 (1360) 23.3
>40% 20.5 ( 496)
<NA> 23.3 ( 565)
OverallPlaquePhenotype % (freq) atheromatous 19.8 ( 480) 23.7
fibroatheromatous 27.8 ( 672)
fibrous 28.7 ( 695)
<NA> 23.7 ( 574)
IL6 (mean (SD)) 98.812 (292.457) 78.2
IL6_pg_ug_2015 (mean (SD)) 0.138 (0.556) 52.5
IL6R_pg_ug_2015 (mean (SD)) 0.212 (0.251) 52.4
MCP1 (mean (SD)) 135.763 (120.028) 76.7
MCP1_pg_ug_2015 (mean (SD)) 0.612 (0.905) 50.6
MCP1_pg_ug_2015Showing the baseline table of the CEA patients in the Athero-Express Biobank with MCP1_pg_ug_2015.
AEDB.CEA.subset <- subset(AEDB.CEA, !is.na(MCP1_pg_ug_2015))
AEDB.CEA.subset.AsymptSympt.tableOne = print(CreateTableOne(vars = basetable_vars,
# factorVars = basetable_bin,
strata = "AsymptSympt2G",
data = AEDB.CEA.subset, includeNA = TRUE),
nonnormal = c(), missing = TRUE,
quote = FALSE, noSpaces = FALSE, showAllLevels = TRUE, explain = TRUE,
format = "pf",
contDigits = 3)[,1:6]
Stratified by AsymptSympt2G
level Asymptomatic Symptomatic p test Missing
n 131 1065
Hospital % (freq) St. Antonius, Nieuwegein 50.4 ( 66) 46.6 ( 496) 0.464 0.0
UMC Utrecht 49.6 ( 65) 53.4 ( 569)
ORyear % (freq) No data available/missing 0.0 ( 0) 0.0 ( 0) NaN 0.0
2002 10.7 ( 14) 3.9 ( 42)
2003 7.6 ( 10) 9.4 ( 100)
2004 17.6 ( 23) 11.5 ( 123)
2005 9.9 ( 13) 11.2 ( 119)
2006 10.7 ( 14) 10.2 ( 109)
2007 11.5 ( 15) 10.5 ( 112)
2008 7.6 ( 10) 7.4 ( 79)
2009 7.6 ( 10) 8.4 ( 89)
2010 5.3 ( 7) 7.6 ( 81)
2011 6.1 ( 8) 9.5 ( 101)
2012 5.3 ( 7) 8.3 ( 88)
2013 0.0 ( 0) 2.0 ( 21)
2014 0.0 ( 0) 0.1 ( 1)
2015 0.0 ( 0) 0.0 ( 0)
2016 0.0 ( 0) 0.0 ( 0)
2017 0.0 ( 0) 0.0 ( 0)
2018 0.0 ( 0) 0.0 ( 0)
2019 0.0 ( 0) 0.0 ( 0)
Age (mean (SD)) 66.237 (9.184) 68.941 (9.119) 0.001 0.0
Gender % (freq) female 23.7 ( 31) 31.4 ( 334) 0.088 0.0
male 76.3 (100) 68.6 ( 731)
TC_finalCU (mean (SD)) 175.987 (47.184) 183.420 (48.377) 0.180 33.4
LDL_finalCU (mean (SD)) 102.781 (38.324) 109.247 (41.008) 0.191 39.6
HDL_finalCU (mean (SD)) 43.701 (14.754) 45.814 (18.526) 0.317 36.4
TG_finalCU (mean (SD)) 157.650 (89.246) 145.238 (84.872) 0.211 36.0
TC_final (mean (SD)) 4.558 (1.222) 4.751 (1.253) 0.180 33.4
LDL_final (mean (SD)) 2.662 (0.993) 2.829 (1.062) 0.191 39.6
HDL_final (mean (SD)) 1.132 (0.382) 1.187 (0.480) 0.317 36.4
TG_final (mean (SD)) 1.781 (1.008) 1.641 (0.959) 0.211 36.0
hsCRP_plasma (mean (SD)) 5.688 (19.440) 16.588 (113.882) 0.379 38.8
systolic (mean (SD)) 153.577 (24.327) 155.822 (26.180) 0.390 14.0
diastoli (mean (SD)) 80.622 (13.225) 82.894 (13.581) 0.095 14.0
GFR_MDRD (mean (SD)) 71.026 (20.424) 71.879 (20.071) 0.653 3.5
BMI (mean (SD)) 26.623 (3.391) 26.321 (3.748) 0.383 4.2
KDOQI % (freq) No data available/missing 0.0 ( 0) 0.0 ( 0) NaN 3.6
Normal kidney function 17.6 ( 23) 17.3 ( 184)
CKD 2 (Mild) 49.6 ( 65) 53.2 ( 567)
CKD 3 (Moderate) 28.2 ( 37) 24.3 ( 259)
CKD 4 (Severe) 0.0 ( 0) 1.2 ( 13)
CKD 5 (Failure) 0.8 ( 1) 0.4 ( 4)
<NA> 3.8 ( 5) 3.6 ( 38)
BMI_WHO % (freq) No data available/missing 0.0 ( 0) 0.0 ( 0) NaN 4.3
Underweight 0.8 ( 1) 0.9 ( 10)
Normal 32.8 ( 43) 35.6 ( 379)
Overweight 51.1 ( 67) 46.1 ( 491)
Obese 13.0 ( 17) 12.8 ( 136)
<NA> 2.3 ( 3) 4.6 ( 49)
SmokerStatus % (freq) Current smoker 30.5 ( 40) 36.2 ( 385) 0.077 3.8
Ex-smoker 57.3 ( 75) 45.6 ( 486)
Never smoked 9.9 ( 13) 14.3 ( 152)
<NA> 2.3 ( 3) 3.9 ( 42)
AlcoholUse % (freq) No 38.2 ( 50) 33.3 ( 355) 0.359 4.0
Yes 59.5 ( 78) 62.4 ( 665)
<NA> 2.3 ( 3) 4.2 ( 45)
DiabetesStatus % (freq) Control (no Diabetes Dx/Med) 76.3 (100) 77.4 ( 824) 0.876 0.0
Diabetes 23.7 ( 31) 22.6 ( 241)
Hypertension.selfreport % (freq) No data available/missing 0.0 ( 0) 0.0 ( 0) NaN 2.0
no 23.7 ( 31) 26.6 ( 283)
yes 75.6 ( 99) 71.3 ( 759)
<NA> 0.8 ( 1) 2.2 ( 23)
Hypertension.selfreportdrug % (freq) No data available/missing 0.0 ( 0) 0.0 ( 0) NaN 2.7
no 30.5 ( 40) 32.9 ( 350)
yes 67.9 ( 89) 64.3 ( 685)
<NA> 1.5 ( 2) 2.8 ( 30)
Hypertension.composite % (freq) No data available/missing 0.0 ( 0) 0.0 ( 0) NaN 0.0
no 9.9 ( 13) 14.3 ( 152)
yes 90.1 (118) 85.7 ( 913)
Hypertension.drugs % (freq) No data available/missing 0.0 ( 0) 0.0 ( 0) NaN 0.2
no 14.5 ( 19) 23.3 ( 248)
yes 85.5 (112) 76.5 ( 815)
<NA> 0.0 ( 0) 0.2 ( 2)
Med.anticoagulants % (freq) No data available/missing 0.0 ( 0) 0.0 ( 0) NaN 0.2
no 89.3 (117) 88.0 ( 937)
yes 10.7 ( 14) 11.8 ( 126)
<NA> 0.0 ( 0) 0.2 ( 2)
Med.all.antiplatelet % (freq) No data available/missing 0.0 ( 0) 0.0 ( 0) NaN 0.4
no 6.1 ( 8) 11.0 ( 117)
yes 93.1 (122) 88.6 ( 944)
<NA> 0.8 ( 1) 0.4 ( 4)
Med.Statin.LLD % (freq) No data available/missing 0.0 ( 0) 0.0 ( 0) NaN 0.2
no 15.3 ( 20) 22.7 ( 242)
yes 84.7 (111) 77.1 ( 821)
<NA> 0.0 ( 0) 0.2 ( 2)
Stroke_Dx % (freq) Missing 0.0 ( 0) 0.0 ( 0) NaN 5.4
No stroke diagnosed 80.2 (105) 75.2 ( 801)
Stroke diagnosed 14.5 ( 19) 19.4 ( 207)
<NA> 5.3 ( 7) 5.4 ( 57)
sympt % (freq) missing 0.0 ( 0) 0.0 ( 0) NaN 0.0
Asymptomatic 100.0 (131) 0.0 ( 0)
TIA 0.0 ( 0) 46.4 ( 494)
minor stroke 0.0 ( 0) 16.7 ( 178)
Major stroke 0.0 ( 0) 12.2 ( 130)
Amaurosis fugax 0.0 ( 0) 17.2 ( 183)
Four vessel disease 0.0 ( 0) 2.2 ( 23)
Vertebrobasilary TIA 0.0 ( 0) 0.2 ( 2)
Retinal infarction 0.0 ( 0) 1.4 ( 15)
Symptomatic, but aspecific symtoms 0.0 ( 0) 2.7 ( 29)
Contralateral symptomatic occlusion 0.0 ( 0) 0.6 ( 6)
retinal infarction 0.0 ( 0) 0.3 ( 3)
armclaudication due to occlusion subclavian artery, CEA needed for bypass 0.0 ( 0) 0.1 ( 1)
retinal infarction + TIAs 0.0 ( 0) 0.0 ( 0)
Ocular ischemic syndrome 0.0 ( 0) 0.1 ( 1)
ischemisch glaucoom 0.0 ( 0) 0.0 ( 0)
subclavian steal syndrome 0.0 ( 0) 0.0 ( 0)
TGA 0.0 ( 0) 0.0 ( 0)
Symptoms.5G % (freq) Asymptomatic 100.0 (131) 0.0 ( 0) <0.001 0.0
Ocular 0.0 ( 0) 17.3 ( 184)
Other 0.0 ( 0) 5.5 ( 59)
Retinal infarction 0.0 ( 0) 1.7 ( 18)
Stroke 0.0 ( 0) 28.9 ( 308)
TIA 0.0 ( 0) 46.6 ( 496)
AsymptSympt % (freq) Asymptomatic 100.0 (131) 0.0 ( 0) <0.001 0.0
Ocular and others 0.0 ( 0) 24.5 ( 261)
Symptomatic 0.0 ( 0) 75.5 ( 804)
AsymptSympt2G % (freq) Asymptomatic 100.0 (131) 0.0 ( 0) <0.001 0.0
Symptomatic 0.0 ( 0) 100.0 (1065)
restenos % (freq) missing 0.0 ( 0) 0.0 ( 0) NaN 2.3
de novo 93.9 (123) 94.7 (1009)
restenosis 2.3 ( 3) 3.2 ( 34)
stenose bij angioseal na PTCA 0.0 ( 0) 0.0 ( 0)
<NA> 3.8 ( 5) 2.1 ( 22)
stenose % (freq) missing 0.0 ( 0) 0.0 ( 0) NaN 3.2
0-49% 0.0 ( 0) 0.6 ( 6)
50-70% 3.1 ( 4) 6.4 ( 68)
70-90% 51.1 ( 67) 44.6 ( 475)
90-99% 41.2 ( 54) 42.7 ( 455)
100% (Occlusion) 0.0 ( 0) 0.9 ( 10)
NA 0.0 ( 0) 0.0 ( 0)
50-99% 0.8 ( 1) 0.4 ( 4)
70-99% 0.0 ( 0) 1.3 ( 14)
99 0.0 ( 0) 0.0 ( 0)
<NA> 3.8 ( 5) 3.1 ( 33)
MedHx_CVD % (freq) No 38.9 ( 51) 36.9 ( 393) 0.720 0.0
yes 61.1 ( 80) 63.1 ( 672)
CAD_history % (freq) Missing 0.0 ( 0) 0.0 ( 0) NaN 0.0
No history CAD 61.8 ( 81) 69.9 ( 744)
History CAD 38.2 ( 50) 30.1 ( 321)
PAOD % (freq) missing/no data 0.0 ( 0) 0.0 ( 0) NaN 0.0
no 74.0 ( 97) 79.5 ( 847)
yes 26.0 ( 34) 20.5 ( 218)
Peripheral.interv % (freq) no 74.0 ( 97) 82.5 ( 879) 0.042 0.3
yes 26.0 ( 34) 17.2 ( 183)
<NA> 0.0 ( 0) 0.3 ( 3)
EP_composite % (freq) No data available. 0.0 ( 0) 0.0 ( 0) NaN 0.8
No composite endpoints 67.2 ( 88) 74.3 ( 791)
Composite endpoints 32.8 ( 43) 24.9 ( 265)
<NA> 0.0 ( 0) 0.8 ( 9)
EP_composite_time (mean (SD)) 2.614 (0.931) 2.613 (1.095) 0.992 0.9
macmean0 (mean (SD)) 0.837 (1.088) 0.781 (1.231) 0.623 2.3
smcmean0 (mean (SD)) 2.152 (1.861) 1.904 (2.222) 0.223 2.7
Macrophages.bin % (freq) no/minor 48.9 ( 64) 47.4 ( 505) 0.583 1.9
moderate/heavy 50.4 ( 66) 50.5 ( 538)
<NA> 0.8 ( 1) 2.1 ( 22)
SMC.bin % (freq) no/minor 22.9 ( 30) 32.2 ( 343) 0.085 1.8
moderate/heavy 75.6 ( 99) 65.9 ( 702)
<NA> 1.5 ( 2) 1.9 ( 20)
neutrophils (mean (SD)) 157.643 (507.380) 172.872 (477.038) 0.876 81.9
Mast_cells_plaque (mean (SD)) 111.400 (112.037) 183.284 (180.156) 0.056 86.1
IPH.bin % (freq) no 41.2 ( 54) 37.9 ( 404) 0.561 1.7
yes 58.0 ( 76) 60.3 ( 642)
<NA> 0.8 ( 1) 1.8 ( 19)
vessel_density_averaged (mean (SD)) 8.608 (6.547) 8.408 (6.469) 0.750 8.7
[ reached getOption("max.print") -- omitted 21 rows ]
MCP1_pg_ug_2015 and MCP1Showing the baseline table of the CEA patients in the Athero-Express Biobank with MCP1_pg_ug_2015 and MCP1.
AEDB.CEA.subset.combo <- subset(AEDB.CEA, !is.na(MCP1_pg_ug_2015) | !is.na(MCP1))
AEDB.CEA.subset.combo.tableOne = print(CreateTableOne(vars = basetable_vars,
# factorVars = basetable_bin,
strata = "AsymptSympt2G",
data = AEDB.CEA.subset.combo, includeNA = TRUE),
nonnormal = c(), missing = TRUE,
quote = FALSE, noSpaces = FALSE, showAllLevels = TRUE, explain = TRUE,
format = "pf",
contDigits = 3)[,1:6]
Stratified by AsymptSympt2G
level Asymptomatic Symptomatic p test Missing
n 161 1165
Hospital % (freq) St. Antonius, Nieuwegein 52.2 ( 84) 47.0 ( 547) 0.246 0.0
UMC Utrecht 47.8 ( 77) 53.0 ( 618)
ORyear % (freq) No data available/missing 0.0 ( 0) 0.0 ( 0) NaN 0.0
2002 10.6 ( 17) 4.8 ( 56)
2003 11.8 ( 19) 10.6 ( 124)
2004 19.9 ( 32) 12.2 ( 142)
2005 13.7 ( 22) 13.3 ( 155)
2006 8.7 ( 14) 10.0 ( 116)
2007 9.3 ( 15) 9.6 ( 112)
2008 6.2 ( 10) 6.8 ( 79)
2009 6.2 ( 10) 7.6 ( 89)
2010 4.3 ( 7) 7.0 ( 81)
2011 5.0 ( 8) 8.7 ( 101)
2012 4.3 ( 7) 7.6 ( 88)
2013 0.0 ( 0) 1.8 ( 21)
2014 0.0 ( 0) 0.1 ( 1)
2015 0.0 ( 0) 0.0 ( 0)
2016 0.0 ( 0) 0.0 ( 0)
2017 0.0 ( 0) 0.0 ( 0)
2018 0.0 ( 0) 0.0 ( 0)
2019 0.0 ( 0) 0.0 ( 0)
Age (mean (SD)) 65.901 (9.051) 68.788 (9.081) <0.001 0.0
Gender % (freq) female 23.0 ( 37) 30.4 ( 354) 0.066 0.0
male 77.0 (124) 69.6 ( 811)
TC_finalCU (mean (SD)) 179.199 (45.274) 183.983 (48.290) 0.331 32.7
LDL_finalCU (mean (SD)) 104.132 (37.590) 109.642 (41.227) 0.215 39.8
HDL_finalCU (mean (SD)) 44.749 (14.890) 45.808 (18.231) 0.568 36.1
TG_finalCU (mean (SD)) 158.699 (87.584) 145.942 (83.223) 0.143 35.6
TC_final (mean (SD)) 4.641 (1.173) 4.765 (1.251) 0.331 32.7
LDL_final (mean (SD)) 2.697 (0.974) 2.840 (1.068) 0.215 39.8
HDL_final (mean (SD)) 1.159 (0.386) 1.186 (0.472) 0.568 36.1
TG_final (mean (SD)) 1.793 (0.990) 1.649 (0.940) 0.143 35.6
hsCRP_plasma (mean (SD)) 6.846 (21.838) 16.213 (110.899) 0.393 40.6
systolic (mean (SD)) 152.838 (24.600) 155.742 (26.411) 0.225 13.5
diastoli (mean (SD)) 80.824 (12.855) 82.873 (13.549) 0.096 13.5
GFR_MDRD (mean (SD)) 70.440 (19.793) 71.901 (20.142) 0.396 3.5
BMI (mean (SD)) 26.626 (3.572) 26.350 (3.768) 0.389 4.4
KDOQI % (freq) No data available/missing 0.0 ( 0) 0.0 ( 0) NaN 3.5
Normal kidney function 14.9 ( 24) 17.4 ( 203)
CKD 2 (Mild) 50.9 ( 82) 53.4 ( 622)
CKD 3 (Moderate) 29.8 ( 48) 23.9 ( 279)
CKD 4 (Severe) 0.0 ( 0) 1.3 ( 15)
CKD 5 (Failure) 0.6 ( 1) 0.4 ( 5)
<NA> 3.7 ( 6) 3.5 ( 41)
BMI_WHO % (freq) No data available/missing 0.0 ( 0) 0.0 ( 0) NaN 4.6
Underweight 1.2 ( 2) 0.9 ( 11)
Normal 32.3 ( 52) 35.5 ( 414)
Overweight 49.7 ( 80) 45.6 ( 531)
Obese 13.7 ( 22) 13.1 ( 153)
<NA> 3.1 ( 5) 4.8 ( 56)
SmokerStatus % (freq) Current smoker 29.2 ( 47) 36.0 ( 419) 0.072 4.0
Ex-smoker 56.5 ( 91) 45.7 ( 532)
Never smoked 11.8 ( 19) 14.2 ( 165)
<NA> 2.5 ( 4) 4.2 ( 49)
AlcoholUse % (freq) No 38.5 ( 62) 33.6 ( 392) 0.223 3.8
Yes 59.6 ( 96) 62.2 ( 725)
<NA> 1.9 ( 3) 4.1 ( 48)
DiabetesStatus % (freq) Control (no Diabetes Dx/Med) 78.3 (126) 77.3 ( 900) 0.852 0.0
Diabetes 21.7 ( 35) 22.7 ( 265)
Hypertension.selfreport % (freq) No data available/missing 0.0 ( 0) 0.0 ( 0) NaN 1.9
no 25.5 ( 41) 26.5 ( 309)
yes 73.9 (119) 71.4 ( 832)
<NA> 0.6 ( 1) 2.1 ( 24)
Hypertension.selfreportdrug % (freq) No data available/missing 0.0 ( 0) 0.0 ( 0) NaN 2.4
no 32.3 ( 52) 32.9 ( 383)
yes 66.5 (107) 64.5 ( 752)
<NA> 1.2 ( 2) 2.6 ( 30)
Hypertension.composite % (freq) No data available/missing 0.0 ( 0) 0.0 ( 0) NaN 0.0
no 11.2 ( 18) 14.1 ( 164)
yes 88.8 (143) 85.9 (1001)
Hypertension.drugs % (freq) No data available/missing 0.0 ( 0) 0.0 ( 0) NaN 0.2
no 15.5 ( 25) 22.7 ( 265)
yes 83.9 (135) 77.1 ( 898)
<NA> 0.6 ( 1) 0.2 ( 2)
Med.anticoagulants % (freq) No data available/missing 0.0 ( 0) 0.0 ( 0) NaN 0.2
no 89.4 (144) 88.0 (1025)
yes 9.9 ( 16) 11.8 ( 138)
<NA> 0.6 ( 1) 0.2 ( 2)
Med.all.antiplatelet % (freq) No data available/missing 0.0 ( 0) 0.0 ( 0) NaN 0.5
no 6.2 ( 10) 10.8 ( 126)
yes 92.5 (149) 88.8 (1035)
<NA> 1.2 ( 2) 0.3 ( 4)
Med.Statin.LLD % (freq) No data available/missing 0.0 ( 0) 0.0 ( 0) NaN 0.2
no 17.4 ( 28) 23.2 ( 270)
yes 82.0 (132) 76.7 ( 893)
<NA> 0.6 ( 1) 0.2 ( 2)
Stroke_Dx % (freq) Missing 0.0 ( 0) 0.0 ( 0) NaN 5.5
No stroke diagnosed 80.1 (129) 75.5 ( 880)
Stroke diagnosed 13.7 ( 22) 19.1 ( 222)
<NA> 6.2 ( 10) 5.4 ( 63)
sympt % (freq) missing 0.0 ( 0) 0.0 ( 0) NaN 0.0
Asymptomatic 100.0 (161) 0.0 ( 0)
TIA 0.0 ( 0) 46.6 ( 543)
minor stroke 0.0 ( 0) 17.2 ( 200)
Major stroke 0.0 ( 0) 11.6 ( 135)
Amaurosis fugax 0.0 ( 0) 16.9 ( 197)
Four vessel disease 0.0 ( 0) 2.1 ( 25)
Vertebrobasilary TIA 0.0 ( 0) 0.2 ( 2)
Retinal infarction 0.0 ( 0) 1.4 ( 16)
Symptomatic, but aspecific symtoms 0.0 ( 0) 3.1 ( 36)
Contralateral symptomatic occlusion 0.0 ( 0) 0.5 ( 6)
retinal infarction 0.0 ( 0) 0.3 ( 3)
armclaudication due to occlusion subclavian artery, CEA needed for bypass 0.0 ( 0) 0.1 ( 1)
retinal infarction + TIAs 0.0 ( 0) 0.0 ( 0)
Ocular ischemic syndrome 0.0 ( 0) 0.1 ( 1)
ischemisch glaucoom 0.0 ( 0) 0.0 ( 0)
subclavian steal syndrome 0.0 ( 0) 0.0 ( 0)
TGA 0.0 ( 0) 0.0 ( 0)
Symptoms.5G % (freq) Asymptomatic 100.0 (161) 0.0 ( 0) <0.001 0.0
Ocular 0.0 ( 0) 17.0 ( 198)
Other 0.0 ( 0) 5.8 ( 68)
Retinal infarction 0.0 ( 0) 1.6 ( 19)
Stroke 0.0 ( 0) 28.8 ( 335)
TIA 0.0 ( 0) 46.8 ( 545)
AsymptSympt % (freq) Asymptomatic 100.0 (161) 0.0 ( 0) <0.001 0.0
Ocular and others 0.0 ( 0) 24.5 ( 285)
Symptomatic 0.0 ( 0) 75.5 ( 880)
AsymptSympt2G % (freq) Asymptomatic 100.0 (161) 0.0 ( 0) <0.001 0.0
Symptomatic 0.0 ( 0) 100.0 (1165)
restenos % (freq) missing 0.0 ( 0) 0.0 ( 0) NaN 2.0
de novo 93.2 (150) 95.0 (1107)
restenosis 3.7 ( 6) 3.1 ( 36)
stenose bij angioseal na PTCA 0.0 ( 0) 0.0 ( 0)
<NA> 3.1 ( 5) 1.9 ( 22)
stenose % (freq) missing 0.0 ( 0) 0.0 ( 0) NaN 2.9
0-49% 0.0 ( 0) 0.6 ( 7)
50-70% 2.5 ( 4) 6.2 ( 72)
70-90% 50.9 ( 82) 44.6 ( 520)
90-99% 42.9 ( 69) 43.3 ( 504)
100% (Occlusion) 0.0 ( 0) 0.9 ( 11)
NA 0.0 ( 0) 0.0 ( 0)
50-99% 0.6 ( 1) 0.3 ( 4)
70-99% 0.0 ( 0) 1.2 ( 14)
99 0.0 ( 0) 0.0 ( 0)
<NA> 3.1 ( 5) 2.8 ( 33)
MedHx_CVD % (freq) No 37.3 ( 60) 36.7 ( 428) 0.965 0.0
yes 62.7 (101) 63.3 ( 737)
CAD_history % (freq) Missing 0.0 ( 0) 0.0 ( 0) NaN 0.0
No history CAD 59.0 ( 95) 69.1 ( 805)
History CAD 41.0 ( 66) 30.9 ( 360)
PAOD % (freq) missing/no data 0.0 ( 0) 0.0 ( 0) NaN 0.0
no 73.9 (119) 79.8 ( 930)
yes 26.1 ( 42) 20.2 ( 235)
Peripheral.interv % (freq) no 72.7 (117) 83.0 ( 967) 0.004 0.2
yes 27.3 ( 44) 16.7 ( 195)
<NA> 0.0 ( 0) 0.3 ( 3)
EP_composite % (freq) No data available. 0.0 ( 0) 0.0 ( 0) NaN 0.8
No composite endpoints 68.3 (110) 73.8 ( 860)
Composite endpoints 31.7 ( 51) 25.2 ( 294)
<NA> 0.0 ( 0) 0.9 ( 11)
EP_composite_time (mean (SD)) 2.579 (0.961) 2.611 (1.130) 0.735 1.0
macmean0 (mean (SD)) 0.802 (1.072) 0.822 (1.275) 0.856 2.2
smcmean0 (mean (SD)) 2.445 (2.594) 1.923 (2.234) 0.007 2.5
Macrophages.bin % (freq) no/minor 50.3 ( 81) 45.8 ( 533) 0.309 1.8
moderate/heavy 49.1 ( 79) 52.3 ( 609)
<NA> 0.6 ( 1) 2.0 ( 23)
SMC.bin % (freq) no/minor 21.7 ( 35) 32.5 ( 379) 0.017 1.7
moderate/heavy 77.0 (124) 65.8 ( 766)
<NA> 1.2 ( 2) 1.7 ( 20)
neutrophils (mean (SD)) 133.447 (437.032) 158.140 (448.512) 0.754 80.9
Mast_cells_plaque (mean (SD)) 123.389 (135.924) 173.244 (168.601) 0.097 83.7
IPH.bin % (freq) no 39.1 ( 63) 36.3 ( 423) 0.512 1.5
yes 60.2 ( 97) 62.1 ( 723)
<NA> 0.6 ( 1) 1.6 ( 19)
vessel_density_averaged (mean (SD)) 8.837 (6.727) 8.439 (6.394) 0.480 8.0
[ reached getOption("max.print") -- omitted 21 rows ]
Showing the baseline table of the CEA patients in the Athero-Express Biobank with plasma MCP1 levels.
NOT AVAILABLE YET
AEDB.CEA.subset.serum <- subset(AEDB.CEA, !is.na(MCP1_plasma))
AEDB.CEA.subset.serum.tableOne = print(CreateTableOne(vars = basetable_vars,
# factorVars = basetable_bin,
strata = "AsymptSympt2G",
data = AEDB.CEA.subset.serum, includeNA = TRUE),
nonnormal = c(), missing = TRUE,
quote = FALSE, noSpaces = FALSE, showAllLevels = TRUE, explain = TRUE,
format = "pf",
contDigits = 3)[,1:6]
Showing the baseline table of the CEA patients in the Athero-Express Biobank with both plasma and plaque MCP1 levels.
NOT AVAILABLE YET
AEDB.CEA.subset.both <- subset(AEDB.CEA, !is.na(MCP1_pg_ug_2015) & !is.na(MCP1))
AEDB.CEA.subset.both.tableOne = print(CreateTableOne(vars = basetable_vars,
# factorVars = basetable_bin,
strata = "AsymptSympt2G",
data = AEDB.CEA.subset.both, includeNA = TRUE),
nonnormal = c(), missing = TRUE,
quote = FALSE, noSpaces = FALSE, showAllLevels = TRUE, explain = TRUE,
format = "pf",
contDigits = 3)[,1:6]
Writing the baseline table to Excel format.
# Write basetable
require(openxlsx)
Loading required package: openxlsx
write.xlsx(file = paste0(BASELINE_loc, "/",Today,".",PROJECTNAME,".AE.BaselineTable.wholeCEA.xlsx"),
AEDB.CEA.tableOne,
row.names = TRUE,
col.names = TRUE,
sheetName = "wholeAEDB_Baseline")
write.xlsx(file = paste0(BASELINE_loc, "/",Today,".",PROJECTNAME,".AE.BaselineTable.wholeCEA.AsymptSympt.xlsx"),
AEDB.CEA.subset.AsymptSympt.tableOne,
row.names = TRUE,
col.names = TRUE,
sheetName = "wholeAEDB_Baseline_Sympt")
write.xlsx(file = paste0(BASELINE_loc, "/",Today,".",PROJECTNAME,".AE.BaselineTable.subsetCEA.xlsx"),
AEDB.CEA.subset.combo.tableOne,
row.names = TRUE,
col.names = TRUE,
sheetName = "subsetAEDB_Baseline")
# write.xlsx(file = paste0(BASELINE_loc, "/",Today,".",PROJECTNAME,".AE.BaselineTable.subsetCEAserum.AsymptSympt.xlsx"),
# AEDB.CEA.subset.serum.tableOne,
# row.names = TRUE,
# col.names = TRUE,
# sheetName = "subsetAEDB_Baseline_serum_Sympt")
Here we inspect the data and when necessary transform quantitative measures. We will inspect the raw, natural log transformed + the smallest measurement, and inverse-normal transformation.
We will explore the plaque levels. As noted above, we will use MCP1_pg_ug_2015.
summary(AEDB.CEA$MCP1_pg_ug_2015)
Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
0.0005 0.1373 0.3401 0.6121 0.7232 10.8540 1225
do.call(rbind , by(AEDB.CEA$MCP1_pg_ug_2015, AEDB.CEA$AsymptSympt2G, summary))
Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
Asymptomatic 0.0061401090 0.09779678 0.2148984 0.4950529 0.4982360 5.761795 139
Symptomatic 0.0004584575 0.14408036 0.3510514 0.6264834 0.7412862 10.853968 1086
library(patchwork)
Attaching package: ‘patchwork’
The following object is masked from ‘package:MASS’:
area
p1 <- ggpubr::gghistogram(AEDB.CEA, "MCP1_pg_ug_2015",
# y = "..count..",
color = "white",
fill = "Gender",
palette = c("#1290D9", "#DB003F"),
# add = "mean",
# rug = TRUE,
# add.params = list(color = "black", linetype = 2),
title = "MCP1 plaque levels",
xlab = "pg/ug",
ggtheme = theme_minimal())
Using `bins = 30` by default. Pick better value with the argument `bins`.
min_MCP1_pg_ug_2015 <- min(AEDB.CEA$MCP1_pg_ug_2015, na.rm = TRUE)
min_MCP1_pg_ug_2015
[1] 0.0004584575
AEDB.CEA$MCP1_pg_ug_2015_LN <- log(AEDB.CEA$MCP1_pg_ug_2015 + min_MCP1_pg_ug_2015)
p2 <- ggpubr::gghistogram(AEDB.CEA, "MCP1_pg_ug_2015_LN",
# y = "..count..",
color = "white",
fill = "Gender",
palette = c("#1290D9", "#DB003F"),
# add = "mean",
# rug = TRUE,
# add.params = list(color = "black", linetype = 2),
# title = "MCP1 plaque levels",
xlab = "natural log-transformed pg/ug",
ggtheme = theme_minimal())
Using `bins = 30` by default. Pick better value with the argument `bins`.
AEDB.CEA$MCP1_pg_ug_2015_rank <- qnorm((rank(AEDB.CEA$MCP1_pg_ug_2015, na.last = "keep") - 0.5) / sum(!is.na(AEDB.CEA$MCP1_pg_ug_2015)))
p3 <- ggpubr::gghistogram(AEDB.CEA, "MCP1_pg_ug_2015_rank",
# y = "..count..",
color = "white",
fill = "Gender",
palette = c("#1290D9", "#DB003F"),
add = "mean",
# rug = TRUE,
# add.params = list(color = "black", linetype = 2),
title = "MCP1 plaque levels",
xlab = "inverse-normal transformation pg/ug",
ggtheme = theme_minimal())
Using `bins = 30` by default. Pick better value with the argument `bins`.
p1
p2
p3
# ggpar(p1, legend = "") / ggpar(p2, legend = "") | ggpar(p3, legend = "right")
rm(p1, p2, p3)
NOT AVAILABLE YET
summary(AEDB.CEA$MCP1)
do.call(rbind , by(AEDB.CEA$MCP1, AEDB.CEA$AsymptSympt2G, summary))
do.call(rbind , by(AEDB.CEA.subset.serum$MCP1_pg_ug_2015, AEDB.CEA.subset.serum$AsymptSympt2G, summary))
do.call(rbind , by(AEDB.CEA.subset.serum$MCP1, AEDB.CEA.subset.serum$AsymptSympt2G, summary))
p1 <- ggpubr::gghistogram(AEDB.CEA, "MCP1",
# y = "..count..",
color = "white",
fill = "Gender",
palette = c("#1290D9", "#DB003F"),
# add = "mean",
# rug = TRUE,
# add.params = list(color = "black", linetype = 2),
title = "MCP1 serum levels",
xlab = "pg/mL",
ggtheme = theme_minimal())
min_MCP1 <- min(AEDB.CEA$MCP1, na.rm = TRUE)
min_MCP1
AEDB.CEA$MCP1_LN <- log(AEDB.CEA$MCP1 + min_MCP1)
p2 <- ggpubr::gghistogram(AEDB.CEA, "MCP1_LN",
# y = "..count..",
color = "white",
fill = "Gender",
palette = c("#1290D9", "#DB003F"),
# add = "mean",
# rug = TRUE,
# add.params = list(color = "black", linetype = 2),
title = "MCP1 serum levels",
xlab = "natural log-transformed pg/ug",
ggtheme = theme_minimal())
AEDB.CEA$MCP1_rank <- qnorm((rank(AEDB.CEA$MCP1, na.last = "keep") - 0.5) / sum(!is.na(AEDB.CEA$MCP1)))
p3 <- ggpubr::gghistogram(AEDB.CEA, "MCP1_rank",
# y = "..count..",
color = "white",
fill = "Gender",
palette = c("#1290D9", "#DB003F"),
add = "mean",
# rug = TRUE,
# add.params = list(color = "black", linetype = 2),
title = "MCP1 serum levels",
xlab = "inverse-normal transformation pg/ug",
ggtheme = theme_minimal())
p1
p2
p3
# ggpar(p1, legend = "") / ggpar(p2, legend = "") | ggpar(p3, legend = "right")
rm(p1, p2, p3)
In line with the previous work by Marios Georgakis we will apply natural log transformation on all proteins and focus the analysis on MCP1 in serum and plaque.
The analyses are focused on three elements:
Age]Gender]Hypertension.composite]DiabetesStatus]SmokerStatus]LDL_final]Med.Statin.LLD]Med.all.antiplatelet]GFR_MDRD]BMI]MedHx_CVD] combination of [CAD_history, Stroke_history, Peripheral.interv]stenose]We will analyze the data through four different models
In the cross-sectional analysis of plaque and serum MCP1, IL6, and IL6R levels we will focus on the following plaque vulnerability phenotypes:
Continous traits
# macrophages
cat("Summary of data.\n")
Summary of data.
summary(AEDB.CEA$macmean0)
Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
0.0000 0.0733 0.3133 0.7676 1.0000 15.1000 720
min_macmean <- min(AEDB.CEA$macmean0, na.rm = TRUE)
cat(paste0("\nMinimum value % macrophages: ",min_macmean,".\n"))
Minimum value % macrophages: 0.
AEDB.CEA$Macrophages_LN <- log(AEDB.CEA$macmean0 + min_macmean)
ggpubr::gghistogram(AEDB.CEA, "Macrophages_LN",
# y = "..count..",
color = "white",
fill = "Gender",
palette = c("#1290D9", "#DB003F"),
add = "median",
#add_density = TRUE,
rug = TRUE,
#add.params = list(color = "black", linetype = 2),
title = "% macrophages",
xlab = "natural log-transformed %",
ggtheme = theme_minimal())
Using `bins = 30` by default. Pick better value with the argument `bins`.
AEDB.CEA$Macrophages_rank <- qnorm((rank(AEDB.CEA$macmean0, na.last = "keep") - 0.5) / sum(!is.na(AEDB.CEA$macmean0)))
ggpubr::gghistogram(AEDB.CEA, "Macrophages_rank",
# y = "..count..",
color = "white",
fill = "Gender",
palette = c("#1290D9", "#DB003F"),
add = "median",
#add_density = TRUE,
rug = TRUE,
#add.params = list(color = "black", linetype = 2),
title = "% macrophages",
xlab = "inverse-rank normalized %",
ggtheme = theme_minimal())
Using `bins = 30` by default. Pick better value with the argument `bins`.
# smooth muscle cells
cat("Summary of data.\n")
Summary of data.
summary(AEDB.CEA$macmean0)
Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
0.0000 0.0733 0.3133 0.7676 1.0000 15.1000 720
min_smcmean <- min(AEDB.CEA$smcmean0, na.rm = TRUE)
cat(paste0("\nMinimum value % smooth muscle cells: ",min_smcmean,".\n"))
Minimum value % smooth muscle cells: 0.
AEDB.CEA$SMC_LN <- log(AEDB.CEA$smcmean0 + min_smcmean)
ggpubr::gghistogram(AEDB.CEA, "SMC_LN",
# y = "..count..",
color = "white",
fill = "Gender",
palette = c("#1290D9", "#DB003F"),
add = "median",
#add_density = TRUE,
rug = TRUE,
#add.params = list(color = "black", linetype = 2),
title = "% smooth muscle cells",
xlab = "natural log-transformed %",
ggtheme = theme_minimal())
Using `bins = 30` by default. Pick better value with the argument `bins`.
AEDB.CEA$SMC_rank <- qnorm((rank(AEDB.CEA$smcmean0, na.last = "keep") - 0.5) / sum(!is.na(AEDB.CEA$smcmean0)))
ggpubr::gghistogram(AEDB.CEA, "SMC_rank",
# y = "..count..",
color = "white",
fill = "Gender",
palette = c("#1290D9", "#DB003F"),
add = "median",
#add_density = TRUE,
rug = TRUE,
#add.params = list(color = "black", linetype = 2),
title = "% smooth muscle cells",
xlab = "inverse-rank normalized %",
ggtheme = theme_minimal())
Using `bins = 30` by default. Pick better value with the argument `bins`.
# vessel density
cat("Summary of data.\n")
Summary of data.
summary(AEDB.CEA$vessel_density_averaged)
Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
0.000 4.000 7.000 8.318 11.300 48.000 850
min_vesseldensity <- min(AEDB.CEA$vessel_density_averaged, na.rm = TRUE)
min_vesseldensity
[1] 0
cat(paste0("\nMinimum value number of intraplaque neovessels per 3-4 hotspots: ",min_vesseldensity,".\n"))
Minimum value number of intraplaque neovessels per 3-4 hotspots: 0.
AEDB.CEA$VesselDensity_LN <- log(AEDB.CEA$vessel_density_averaged + min_vesseldensity)
ggpubr::gghistogram(AEDB.CEA, "VesselDensity_LN",
# y = "..count..",
color = "white",
fill = "Gender",
palette = c("#1290D9", "#DB003F"),
add = "median",
#add_density = TRUE,
rug = TRUE,
#add.params = list(color = "black", linetype = 2),
title = "number of intraplaque neovessels per 3-4 hotspots",
xlab = "natural log-transformed number",
ggtheme = theme_minimal())
Using `bins = 30` by default. Pick better value with the argument `bins`.
AEDB.CEA$VesselDensity_rank <- qnorm((rank(AEDB.CEA$vessel_density_averaged, na.last = "keep") - 0.5) / sum(!is.na(AEDB.CEA$vessel_density_averaged)))
ggpubr::gghistogram(AEDB.CEA, "VesselDensity_rank",
# y = "..count..",
color = "white",
fill = "Gender",
palette = c("#1290D9", "#DB003F"),
add = "median",
#add_density = TRUE,
rug = TRUE,
#add.params = list(color = "black", linetype = 2),
title = "number of intraplaque neovessels per 3-4 hotspots",
xlab = "inverse-rank normalized number",
ggtheme = theme_minimal())
Using `bins = 30` by default. Pick better value with the argument `bins`.
Binary traits
# calcification
cat("Summary of data.\n")
Summary of data.
summary(AEDB.CEA$Calc.bin)
no/minor moderate/heavy NA's
1006 849 566
contrasts(AEDB.CEA$Calc.bin)
moderate/heavy
no/minor 0
moderate/heavy 1
AEDB.CEA$CalcificationPlaque <- as.factor(AEDB.CEA$Calc.bin)
df <- AEDB.CEA %>%
filter(!is.na(CalcificationPlaque)) %>%
group_by(Gender, CalcificationPlaque) %>%
summarise(counts = n())
ggpubr::ggbarplot(df, x = "CalcificationPlaque", y = "counts",
# y = "..count..",
color = "white",
fill = "Gender",
palette = c("#DB003F", "#1290D9"),
label = TRUE, lab.vjust = 2, lab.col = "#FFFFFF",
title = "Calcification",
xlab = "calcification",
ggtheme = theme_minimal())
rm(df)
# collagen
cat("Summary of data.\n")
Summary of data.
summary(AEDB.CEA$Collagen.bin)
no/minor moderate/heavy NA's
382 1467 572
contrasts(AEDB.CEA$Collagen.bin)
moderate/heavy
no/minor 0
moderate/heavy 1
AEDB.CEA$CollagenPlaque <- as.factor(AEDB.CEA$Collagen.bin)
df <- AEDB.CEA %>%
filter(!is.na(CollagenPlaque)) %>%
group_by(Gender, CollagenPlaque) %>%
summarise(counts = n())
ggpubr::ggbarplot(df, x = "CollagenPlaque", y = "counts",
# y = "..count..",
color = "white",
fill = "Gender",
palette = c("#DB003F", "#1290D9"),
label = TRUE, lab.vjust = 2, lab.col = "#FFFFFF",
title = "Collagen",
xlab = "collagen",
ggtheme = theme_minimal())
rm(df)
# fat 10%
cat("Summary of data.\n")
Summary of data.
summary(AEDB.CEA$Fat.bin_10)
<10% >10% NA's
542 1314 565
contrasts(AEDB.CEA$Fat.bin_10)
>10%
<10% 0
>10% 1
AEDB.CEA$Fat10Perc <- as.factor(AEDB.CEA$Fat.bin_10)
df <- AEDB.CEA %>%
filter(!is.na(Fat10Perc)) %>%
group_by(Gender, Fat10Perc) %>%
summarise(counts = n())
ggpubr::ggbarplot(df, x = "Fat10Perc", y = "counts",
# y = "..count..",
color = "white",
fill = "Gender",
palette = c("#DB003F", "#1290D9"),
label = TRUE, lab.vjust = 2, lab.col = "#FFFFFF",
title = "Intraplaque fat",
xlab = "intraplaque fat",
ggtheme = theme_minimal())
rm(df)
# IPH
cat("Summary of data.\n")
Summary of data.
summary(AEDB.CEA$IPH.bin)
no yes NA's
744 1108 569
contrasts(AEDB.CEA$IPH.bin)
yes
no 0
yes 1
AEDB.CEA$IPH <- as.factor(AEDB.CEA$IPH.bin)
df <- AEDB.CEA %>%
filter(!is.na(IPH)) %>%
group_by(Gender, IPH) %>%
summarise(counts = n())
ggpubr::ggbarplot(df, x = "IPH", y = "counts",
# y = "..count..",
color = "white",
fill = "Gender",
palette = c("#DB003F", "#1290D9"),
label = TRUE, lab.vjust = 2, lab.col = "#FFFFFF",
title = "Intraplaque hemorrhage",
xlab = "intraplaque hemorrhage",
ggtheme = theme_minimal())
rm(df)
# Symptoms
cat("Summary of data.\n")
Summary of data.
summary(AEDB.CEA$AsymptSympt)
Asymptomatic Ocular and others Symptomatic
270 540 1611
contrasts(AEDB.CEA$AsymptSympt)
Ocular and others Symptomatic
Asymptomatic 0 0
Ocular and others 1 0
Symptomatic 0 1
AEDB.CEA$AsymptSympt <- as.factor(AEDB.CEA$AsymptSympt)
df <- AEDB.CEA %>%
filter(!is.na(AsymptSympt)) %>%
group_by(Gender, AsymptSympt) %>%
summarise(counts = n())
ggpubr::ggbarplot(df, x = "AsymptSympt", y = "counts",
# y = "..count..",
color = "white",
fill = "Gender",
palette = c("#DB003F", "#1290D9"),
label = TRUE, lab.vjust = 2, lab.col = "#FFFFFF",
title = "Symptoms",
xlab = "symptoms",
ggtheme = theme_minimal())
rm(df)
In this section we make some variables to assist with analysis.
AEDB.CEA.samplesize = nrow(AEDB.CEA)
# TRAITS.PROTEIN = c("IL6_LN", "MCP1_LN", "IL6_pg_ug_2015_LN", "IL6R_pg_ug_2015_LN", "MCP1_pg_ug_2015_LN")
# TRAITS.PROTEIN.RANK = c("IL6_rank", "MCP1_rank", "IL6_pg_ug_2015_rank", "IL6R_pg_ug_2015_rank", "MCP1_pg_ug_2015_rank")
# TRAITS.PROTEIN.RANK = c("MCP1_pg_ug_2015_rank", "MCP1_rank")
TRAITS.PROTEIN.RANK = c("MCP1_pg_ug_2015_rank")
# TRAITS.CON = c("Macrophages_LN", "SMC_LN", "VesselDensity_LN")
TRAITS.CON.RANK = c("Macrophages_rank", "SMC_rank", "VesselDensity_rank")
TRAITS.BIN = c("CalcificationPlaque", "CollagenPlaque", "Fat10Perc", "IPH")
# "Hospital",
# "Age", "Gender",
# "TC_final", "LDL_final", "HDL_final", "TG_final",
# "systolic", "diastoli", "GFR_MDRD", "BMI",
# "KDOQI", "BMI_WHO",
# "SmokerCurrent", "eCigarettes", "ePackYearsSmoking",
# "DiabetesStatus", "Hypertension.composite",
# "Hypertension.drugs", "Med.anticoagulants", "Med.all.antiplatelet", "Med.Statin.LLD",
# "Stroke_Dx", "sympt", "Symptoms.5G", "restenos",
# "EP_composite", "EP_composite_time",
# "macmean0", "smcmean0", "Macrophages.bin", "SMC.bin",
# "neutrophils", "Mast_cells_plaque",
# "IPH.bin", "vessel_density_averaged",
# "Calc.bin", "Collagen.bin",
# "Fat.bin_10", "Fat.bin_40", "OverallPlaquePhenotype",
# "IL6_pg_ug_2015", "MCP1_pg_ug_2015",
# "QC2018_FILTER", "CHIP", "SAMPLE_TYPE",
# "CAD_history", "Stroke_history", "Peripheral.interv",
# "stenose"
# 1. Age (continuous in 1-year increment). [Age]
# 2. Sex (male vs. female). [Gender]
# 3. Presence of hypertension at baseline (defined either as history of hypertension, SBP ≥140 mm Hg, DBP ≥90 mm Hg, or prescription of antihypertensive medications). [Hypertension.composite]
# 4. Presence of diabetes mellitus at baseline (defined either as a history of diabetes, administration of glucose lowering medication, HbA1c ≥6.5%, fasting glucose ≥126 mg/dl, .or random glucose levels ≥200 mg/dl). [DiabetesStatus]
# 5. Smoking (current, ex-, never). [SmokerCurrent]
# 6. LDL-C levels (continuous). [LDL_final]
# 7. Use of lipid-lowering drugs. [Med.Statin.LLD]
# 8. Use of antiplatelet drugs. [Med.all.antiplatelet]
# 9. eGFR (continuous). [GFR_MDRD]
# 10. BMI (continuous). [BMI]
# 11. History of cardiovascular disease (stroke, coronary artery disease, peripheral artery disease). [MedHx_CVD] combinatino of: [CAD_history, Stroke_history, Peripheral.interv]
# 12. Level of stenosis (50-70% vs. 70-99%). [stenose]
# Models
# Model 1: adjusted for age and sex
# Model 2: adjusted for age, sex, hypertension, diabetes, smoking, LDL-C levels, lipid-lowering drugs, antiplatelet drugs, eGFR, BMI, history of CVD, level of stenosis,
COVARIATES_M1 = c("Age", "Gender")
COVARIATES_M2 = c(COVARIATES_M1,
"Hypertension.composite", "DiabetesStatus",
"SmokerStatus",
# "SmokerCurrent",
"Med.Statin.LLD", "Med.all.antiplatelet",
"GFR_MDRD", "BMI",
# "CAD_history", "Stroke_history", "Peripheral.interv",
"MedHx_CVD",
"stenose")
# COVARIATES_M3 = c(COVARIATES_M2, "LDL_final")
# COVARIATES_M4 = c(COVARIATES_M2, "hsCRP_plasma")
# COVARIATES_M5 = c(COVARIATES_M2, "IL6_pg_ug_2015_LN")
# COVARIATES_M5rank = c(COVARIATES_M2, "IL6_pg_ug_2015_rank")
In this model we correct for Age and Gender.
Here we use the inverse-rank normalized data - visually this is more normally distributed.
Analysis of continuous/quantitative plaque traits as a function of serum/plaque MCP1 levels.
GLM.results <- data.frame(matrix(NA, ncol = 15, nrow = 0))
cat("Running linear regression...\n")
Running linear regression...
for (protein in 1:length(TRAITS.PROTEIN.RANK)) {
PROTEIN = TRAITS.PROTEIN.RANK[protein]
cat(paste0("\nAnalysis of ",PROTEIN,".\n"))
for (trait in 1:length(TRAITS.CON.RANK)) {
TRAIT = TRAITS.CON.RANK[trait]
cat(paste0("\n- processing ",TRAIT,"\n\n"))
currentDF <- as.data.frame(AEDB.CEA %>%
dplyr::select(., PROTEIN, TRAIT, COVARIATES_M1) %>%
filter(complete.cases(.))) %>%
filter_if(~is.numeric(.), all_vars(!is.infinite(.)))
# for debug
# print(DT::datatable(currentDF))
# print(nrow(currentDF))
# print(str(currentDF))
### univariate
fit <- lm(currentDF[,PROTEIN] ~ currentDF[,TRAIT] + Age + Gender, data = currentDF)
model_step <- stepAIC(fit, direction = "both", trace = FALSE)
print(model_step)
print(summary(fit))
GLM.results.TEMP <- data.frame(matrix(NA, ncol = 15, nrow = 0))
GLM.results.TEMP[1,] = GLM.CON(fit, "AEDB.CEA", PROTEIN, TRAIT, verbose = TRUE)
GLM.results = rbind(GLM.results, GLM.results.TEMP)
}
}
Analysis of MCP1_pg_ug_2015_rank.
- processing Macrophages_rank
Note: Using an external vector in selections is ambiguous.
[34mℹ[39m Use `all_of(PROTEIN)` instead of `PROTEIN` to silence this message.
[34mℹ[39m See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
[90mThis message is displayed once per session.[39m
Note: Using an external vector in selections is ambiguous.
[34mℹ[39m Use `all_of(TRAIT)` instead of `TRAIT` to silence this message.
[34mℹ[39m See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
[90mThis message is displayed once per session.[39m
Note: Using an external vector in selections is ambiguous.
[34mℹ[39m Use `all_of(COVARIATES_M1)` instead of `COVARIATES_M1` to silence this message.
[34mℹ[39m See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
[90mThis message is displayed once per session.[39m
Call:
lm(formula = currentDF[, PROTEIN] ~ currentDF[, TRAIT] + Gender,
data = currentDF)
Coefficients:
(Intercept) currentDF[, TRAIT] Gendermale
-0.08694 -0.04450 0.10580
Call:
lm(formula = currentDF[, PROTEIN] ~ currentDF[, TRAIT] + Age +
Gender, data = currentDF)
Residuals:
Min 1Q Median 3Q Max
-3.4082 -0.6842 0.0046 0.6589 3.3363
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -0.0417401 0.2249607 -0.186 0.8528
currentDF[, TRAIT] -0.0450829 0.0294144 -1.533 0.1256
Age -0.0006612 0.0031991 -0.207 0.8363
Gendermale 0.1060371 0.0635635 1.668 0.0955 .
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.9995 on 1164 degrees of freedom
Multiple R-squared: 0.004088, Adjusted R-squared: 0.001522
F-statistic: 1.593 on 3 and 1164 DF, p-value: 0.1894
Analyzing in dataset ' AEDB.CEA ' the association of ' MCP1_pg_ug_2015_rank ' with ' Macrophages_rank ' .
Collecting data.
We have collected the following and summarize it in an object:
Dataset...................: AEDB.CEA
Score/Exposure/biomarker..: MCP1_pg_ug_2015_rank
Trait/outcome.............: Macrophages_rank
Effect size...............: -0.045083
Standard error............: 0.029414
Odds ratio (effect size)..: 0.956
Lower 95% CI..............: 0.902
Upper 95% CI..............: 1.013
T-value...................: -1.532681
P-value...................: 0.1256263
R^2.......................: 0.004088
Adjusted r^2..............: 0.001522
Sample size of AE DB......: 2421
Sample size of model......: 1168
Missing data %............: 51.75547
- processing SMC_rank
Call:
lm(formula = currentDF[, PROTEIN] ~ currentDF[, TRAIT], data = currentDF)
Coefficients:
(Intercept) currentDF[, TRAIT]
-0.01596 -0.11159
Call:
lm(formula = currentDF[, PROTEIN] ~ currentDF[, TRAIT] + Age +
Gender, data = currentDF)
Residuals:
Min 1Q Median 3Q Max
-3.4354 -0.6542 -0.0098 0.6350 3.3670
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.112040 0.228792 0.490 0.624438
currentDF[, TRAIT] -0.111735 0.030680 -3.642 0.000283 ***
Age -0.002497 0.003236 -0.772 0.440423
Gendermale 0.062484 0.063873 0.978 0.328152
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.9952 on 1160 degrees of freedom
Multiple R-squared: 0.01319, Adjusted R-squared: 0.01064
F-statistic: 5.169 on 3 and 1160 DF, p-value: 0.001498
Analyzing in dataset ' AEDB.CEA ' the association of ' MCP1_pg_ug_2015_rank ' with ' SMC_rank ' .
Collecting data.
We have collected the following and summarize it in an object:
Dataset...................: AEDB.CEA
Score/Exposure/biomarker..: MCP1_pg_ug_2015_rank
Trait/outcome.............: SMC_rank
Effect size...............: -0.111735
Standard error............: 0.03068
Odds ratio (effect size)..: 0.894
Lower 95% CI..............: 0.842
Upper 95% CI..............: 0.95
T-value...................: -3.641935
P-value...................: 0.000282537
R^2.......................: 0.013191
Adjusted r^2..............: 0.010639
Sample size of AE DB......: 2421
Sample size of model......: 1164
Missing data %............: 51.92069
- processing VesselDensity_rank
Call:
lm(formula = currentDF[, PROTEIN] ~ currentDF[, TRAIT] + Gender,
data = currentDF)
Coefficients:
(Intercept) currentDF[, TRAIT] Gendermale
-0.1146 -0.1291 0.1266
Call:
lm(formula = currentDF[, PROTEIN] ~ currentDF[, TRAIT] + Age +
Gender, data = currentDF)
Residuals:
Min 1Q Median 3Q Max
-3.2793 -0.6743 0.0015 0.6370 3.4081
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -0.142513 0.233373 -0.611 0.5415
currentDF[, TRAIT] -0.129023 0.030547 -4.224 2.6e-05 ***
Age 0.000407 0.003312 0.123 0.9022
Gendermale 0.126560 0.065772 1.924 0.0546 .
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 1.002 on 1088 degrees of freedom
Multiple R-squared: 0.01916, Adjusted R-squared: 0.01645
F-statistic: 7.084 on 3 and 1088 DF, p-value: 0.0001025
Analyzing in dataset ' AEDB.CEA ' the association of ' MCP1_pg_ug_2015_rank ' with ' VesselDensity_rank ' .
Collecting data.
We have collected the following and summarize it in an object:
Dataset...................: AEDB.CEA
Score/Exposure/biomarker..: MCP1_pg_ug_2015_rank
Trait/outcome.............: VesselDensity_rank
Effect size...............: -0.129023
Standard error............: 0.030547
Odds ratio (effect size)..: 0.879
Lower 95% CI..............: 0.828
Upper 95% CI..............: 0.933
T-value...................: -4.223732
P-value...................: 2.60385e-05
R^2.......................: 0.019159
Adjusted r^2..............: 0.016454
Sample size of AE DB......: 2421
Sample size of model......: 1092
Missing data %............: 54.89467
cat("Edit the column names...\n")
Edit the column names...
colnames(GLM.results) = c("Dataset", "Predictor", "Trait",
"Beta", "s.e.m.",
"OR", "low95CI", "up95CI",
"T-value", "P-value", "r^2", "r^2_adj", "AE_N", "Model_N", "Perc_Miss")
cat("Correct the variable types...\n")
Correct the variable types...
GLM.results$Beta <- as.numeric(GLM.results$Beta)
GLM.results$s.e.m. <- as.numeric(GLM.results$s.e.m.)
GLM.results$OR <- as.numeric(GLM.results$OR)
GLM.results$low95CI <- as.numeric(GLM.results$low95CI)
GLM.results$up95CI <- as.numeric(GLM.results$up95CI)
GLM.results$`T-value` <- as.numeric(GLM.results$`T-value`)
GLM.results$`P-value` <- as.numeric(GLM.results$`P-value`)
GLM.results$`r^2` <- as.numeric(GLM.results$`r^2`)
GLM.results$`r^2_adj` <- as.numeric(GLM.results$`r^2_adj`)
GLM.results$`AE_N` <- as.numeric(GLM.results$`AE_N`)
GLM.results$`Model_N` <- as.numeric(GLM.results$`Model_N`)
GLM.results$`Perc_Miss` <- as.numeric(GLM.results$`Perc_Miss`)
# Save the data
cat("Writing results to Excel-file...\n")
Writing results to Excel-file...
### Univariate
library(openxlsx)
write.xlsx(GLM.results,
file = paste0(OUT_loc, "/",Today,".AEDB.CEA.Con.Uni.Protein.PlaquePhenotypes.RANK.MODEL1.xlsx"),
row.names = FALSE, col.names = TRUE, sheetName = "Con.Uni.PlaquePheno")
# Removing intermediates
cat("Removing intermediate files...\n")
Removing intermediate files...
rm(TRAIT, trait, currentDF, GLM.results, GLM.results.TEMP, fit, model_step)
Analysis of binary plaque traits as a function of serum/plaque MCP1 levels.
GLM.results <- data.frame(matrix(NA, ncol = 16, nrow = 0))
for (protein in 1:length(TRAITS.PROTEIN.RANK)) {
PROTEIN = TRAITS.PROTEIN.RANK[protein]
cat(paste0("\nAnalysis of ",PROTEIN,".\n"))
for (trait in 1:length(TRAITS.BIN)) {
TRAIT = TRAITS.BIN[trait]
cat(paste0("\n- processing ",TRAIT,"\n\n"))
currentDF <- as.data.frame(AEDB.CEA %>%
dplyr::select(., PROTEIN, TRAIT, COVARIATES_M1) %>%
filter(complete.cases(.))) %>%
filter_if(~is.numeric(.), all_vars(!is.infinite(.)))
# for debug
# print(DT::datatable(currentDF))
# print(nrow(currentDF))
# print(str(currentDF))
# print(class(currentDF[,TRAIT]))
### univariate
fit <- glm(as.factor(currentDF[,TRAIT]) ~ currentDF[,PROTEIN] + Age + Gender,
data = currentDF, family = binomial(link = "logit"))
model_step <- stepAIC(fit, direction = "both", trace = FALSE)
print(model_step)
print(summary(fit))
GLM.results.TEMP <- data.frame(matrix(NA, ncol = 16, nrow = 0))
GLM.results.TEMP[1,] = GLM.BIN(fit, "AEDB.CEA", PROTEIN, TRAIT, verbose = TRUE)
GLM.results = rbind(GLM.results, GLM.results.TEMP)
}
}
Analysis of MCP1_pg_ug_2015_rank.
- processing CalcificationPlaque
Call: glm(formula = as.factor(currentDF[, TRAIT]) ~ currentDF[, PROTEIN] +
Age + Gender, family = binomial(link = "logit"), data = currentDF)
Coefficients:
(Intercept) currentDF[, PROTEIN] Age Gendermale
-0.88365 -0.44949 0.01414 -0.19877
Degrees of Freedom: 1177 Total (i.e. Null); 1174 Residual
Null Deviance: 1632
Residual Deviance: 1570 AIC: 1578
Call:
glm(formula = as.factor(currentDF[, TRAIT]) ~ currentDF[, PROTEIN] +
Age + Gender, family = binomial(link = "logit"), data = currentDF)
Deviance Residuals:
Min 1Q Median 3Q Max
-1.7790 -1.1167 -0.7616 1.1294 1.9050
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -0.883647 0.460710 -1.918 0.0551 .
currentDF[, PROTEIN] -0.449493 0.062887 -7.148 8.83e-13 ***
Age 0.014139 0.006548 2.159 0.0308 *
Gendermale -0.198771 0.129909 -1.530 0.1260
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 1632.5 on 1177 degrees of freedom
Residual deviance: 1570.1 on 1174 degrees of freedom
AIC: 1578.1
Number of Fisher Scoring iterations: 4
Analyzing in dataset ' AEDB.CEA ' the association of ' MCP1_pg_ug_2015_rank ' with ' CalcificationPlaque ' ...
Collecting data...
We have collected the following and summarize it in an object:
Dataset...................: AEDB.CEA
Score/Exposure/biomarker..: MCP1_pg_ug_2015_rank
Trait/outcome.............: CalcificationPlaque
Effect size...............: -0.449493
Standard error............: 0.062887
Odds ratio (effect size)..: 0.638
Lower 95% CI..............: 0.564
Upper 95% CI..............: 0.722
Z-value...................: -7.147681
P-value...................: 8.825609e-13
Hosmer and Lemeshow r^2...: 0.038193
Cox and Snell r^2.........: 0.051552
Nagelkerke's pseudo r^2...: 0.068747
Sample size of AE DB......: 2421
Sample size of model......: 1178
Missing data %............: 51.34242
- processing CollagenPlaque
Call: glm(formula = as.factor(currentDF[, TRAIT]) ~ currentDF[, PROTEIN],
family = binomial(link = "logit"), data = currentDF)
Coefficients:
(Intercept) currentDF[, PROTEIN]
1.3310 -0.2324
Degrees of Freedom: 1178 Total (i.e. Null); 1177 Residual
Null Deviance: 1216
Residual Deviance: 1205 AIC: 1209
Call:
glm(formula = as.factor(currentDF[, TRAIT]) ~ currentDF[, PROTEIN] +
Age + Gender, family = binomial(link = "logit"), data = currentDF)
Deviance Residuals:
Min 1Q Median 3Q Max
-2.1020 0.5729 0.6568 0.7158 0.9553
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) 1.143689 0.546619 2.092 0.03641 *
currentDF[, PROTEIN] -0.231697 0.072337 -3.203 0.00136 **
Age 0.002892 0.007791 0.371 0.71047
Gendermale -0.015641 0.156179 -0.100 0.92023
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 1215.6 on 1178 degrees of freedom
Residual deviance: 1205.0 on 1175 degrees of freedom
AIC: 1213
Number of Fisher Scoring iterations: 4
Analyzing in dataset ' AEDB.CEA ' the association of ' MCP1_pg_ug_2015_rank ' with ' CollagenPlaque ' ...
Collecting data...
We have collected the following and summarize it in an object:
Dataset...................: AEDB.CEA
Score/Exposure/biomarker..: MCP1_pg_ug_2015_rank
Trait/outcome.............: CollagenPlaque
Effect size...............: -0.231697
Standard error............: 0.072337
Odds ratio (effect size)..: 0.793
Lower 95% CI..............: 0.688
Upper 95% CI..............: 0.914
Z-value...................: -3.202999
P-value...................: 0.001360044
Hosmer and Lemeshow r^2...: 0.008726
Cox and Snell r^2.........: 0.008956
Nagelkerke's pseudo r^2...: 0.013921
Sample size of AE DB......: 2421
Sample size of model......: 1179
Missing data %............: 51.30112
- processing Fat10Perc
Call: glm(formula = as.factor(currentDF[, TRAIT]) ~ currentDF[, PROTEIN] +
Age + Gender, family = binomial(link = "logit"), data = currentDF)
Coefficients:
(Intercept) currentDF[, PROTEIN] Age Gendermale
-0.32933 0.15569 0.01093 0.85630
Degrees of Freedom: 1178 Total (i.e. Null); 1175 Residual
Null Deviance: 1386
Residual Deviance: 1339 AIC: 1347
Call:
glm(formula = as.factor(currentDF[, TRAIT]) ~ currentDF[, PROTEIN] +
Age + Gender, family = binomial(link = "logit"), data = currentDF)
Deviance Residuals:
Min 1Q Median 3Q Max
-1.9399 -1.2794 0.6824 0.7695 1.2262
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -0.329327 0.501681 -0.656 0.5115
currentDF[, PROTEIN] 0.155685 0.066807 2.330 0.0198 *
Age 0.010928 0.007185 1.521 0.1283
Gendermale 0.856297 0.137117 6.245 4.24e-10 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 1386.5 on 1178 degrees of freedom
Residual deviance: 1338.8 on 1175 degrees of freedom
AIC: 1346.8
Number of Fisher Scoring iterations: 4
Analyzing in dataset ' AEDB.CEA ' the association of ' MCP1_pg_ug_2015_rank ' with ' Fat10Perc ' ...
Collecting data...
We have collected the following and summarize it in an object:
Dataset...................: AEDB.CEA
Score/Exposure/biomarker..: MCP1_pg_ug_2015_rank
Trait/outcome.............: Fat10Perc
Effect size...............: 0.155685
Standard error............: 0.066807
Odds ratio (effect size)..: 1.168
Lower 95% CI..............: 1.025
Upper 95% CI..............: 1.332
Z-value...................: 2.330372
P-value...................: 0.01978647
Hosmer and Lemeshow r^2...: 0.034376
Cox and Snell r^2.........: 0.039619
Nagelkerke's pseudo r^2...: 0.057296
Sample size of AE DB......: 2421
Sample size of model......: 1179
Missing data %............: 51.30112
- processing IPH
Call: glm(formula = as.factor(currentDF[, TRAIT]) ~ currentDF[, PROTEIN] +
Gender, family = binomial(link = "logit"), data = currentDF)
Coefficients:
(Intercept) currentDF[, PROTEIN] Gendermale
0.02945 -0.12895 0.61756
Degrees of Freedom: 1175 Total (i.e. Null); 1173 Residual
Null Deviance: 1572
Residual Deviance: 1546 AIC: 1552
Call:
glm(formula = as.factor(currentDF[, TRAIT]) ~ currentDF[, PROTEIN] +
Age + Gender, family = binomial(link = "logit"), data = currentDF)
Deviance Residuals:
Min 1Q Median 3Q Max
-1.650 -1.271 0.884 0.956 1.342
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -0.157708 0.462022 -0.341 0.7328
currentDF[, PROTEIN] -0.128693 0.060765 -2.118 0.0342 *
Age 0.002737 0.006577 0.416 0.6773
Gendermale 0.616805 0.129037 4.780 1.75e-06 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 1572.3 on 1175 degrees of freedom
Residual deviance: 1545.6 on 1172 degrees of freedom
AIC: 1553.6
Number of Fisher Scoring iterations: 4
Analyzing in dataset ' AEDB.CEA ' the association of ' MCP1_pg_ug_2015_rank ' with ' IPH ' ...
Collecting data...
We have collected the following and summarize it in an object:
Dataset...................: AEDB.CEA
Score/Exposure/biomarker..: MCP1_pg_ug_2015_rank
Trait/outcome.............: IPH
Effect size...............: -0.128693
Standard error............: 0.060765
Odds ratio (effect size)..: 0.879
Lower 95% CI..............: 0.781
Upper 95% CI..............: 0.99
Z-value...................: -2.117883
P-value...................: 0.03418496
Hosmer and Lemeshow r^2...: 0.016998
Cox and Snell r^2.........: 0.022471
Nagelkerke's pseudo r^2...: 0.030474
Sample size of AE DB......: 2421
Sample size of model......: 1176
Missing data %............: 51.42503
cat("Edit the column names...\n")
Edit the column names...
colnames(GLM.results) = c("Dataset", "Predictor", "Trait",
"Beta", "s.e.m.",
"OR", "low95CI", "up95CI",
"Z-value", "P-value", "r^2_l", "r^2_cs", "r^2_nagelkerke", "AE_N", "Model_N", "Perc_Miss")
cat("Correct the variable types...\n")
Correct the variable types...
GLM.results$Beta <- as.numeric(GLM.results$Beta)
GLM.results$s.e.m. <- as.numeric(GLM.results$s.e.m.)
GLM.results$OR <- as.numeric(GLM.results$OR)
GLM.results$low95CI <- as.numeric(GLM.results$low95CI)
GLM.results$up95CI <- as.numeric(GLM.results$up95CI)
GLM.results$`Z-value` <- as.numeric(GLM.results$`Z-value`)
GLM.results$`P-value` <- as.numeric(GLM.results$`P-value`)
GLM.results$`r^2_l` <- as.numeric(GLM.results$`r^2_l`)
GLM.results$`r^2_cs` <- as.numeric(GLM.results$`r^2_cs`)
GLM.results$`r^2_nagelkerke` <- as.numeric(GLM.results$`r^2_nagelkerke`)
GLM.results$`AE_N` <- as.numeric(GLM.results$`AE_N`)
GLM.results$`Model_N` <- as.numeric(GLM.results$`Model_N`)
GLM.results$`Perc_Miss` <- as.numeric(GLM.results$`Perc_Miss`)
# Save the data
cat("Writing results to Excel-file...\n")
Writing results to Excel-file...
### Univariate
write.xlsx(GLM.results,
file = paste0(OUT_loc, "/",Today,".AEDB.CEA.Bin.Uni.Protein.PlaquePhenotypes.RANK.MODEL1.xlsx"),
row.names = FALSE, col.names = TRUE, sheetName = "Bin.Uni.PlaquePheno")
# Removing intermediates
cat("Removing intermediate files...\n")
Removing intermediate files...
rm(TRAIT, trait, currentDF, GLM.results, GLM.results.TEMP, fit, model_step)
In this model we correct for Age, Gender, Hypertension status, Diabetes status, current smoker status, lipid-lowering drugs (LLDs), antiplatelet medication, eGFR (MDRD), BMI, MedHx_CVD (combination of CAD history, stroke history, and peripheral interventions), and stenosis.
Here we use the inverse-rank normalized data - visually this is more normally distributed.
Analysis of continuous/quantitative plaque traits as a function of serum/plaque MCP1 levels.
GLM.results <- data.frame(matrix(NA, ncol = 15, nrow = 0))
cat("Running linear regression...\n")
Running linear regression...
for (protein in 1:length(TRAITS.PROTEIN.RANK)) {
PROTEIN = TRAITS.PROTEIN.RANK[protein]
cat(paste0("\nAnalysis of ",PROTEIN,".\n"))
for (trait in 1:length(TRAITS.CON.RANK)) {
TRAIT = TRAITS.CON.RANK[trait]
cat(paste0("\n- processing ",TRAIT,"\n\n"))
currentDF <- as.data.frame(AEDB.CEA %>%
dplyr::select(., PROTEIN, TRAIT, COVARIATES_M2) %>%
filter(complete.cases(.))) %>%
filter_if(~is.numeric(.), all_vars(!is.infinite(.)))
# for debug
# print(DT::datatable(currentDF))
# print(nrow(currentDF))
# print(str(currentDF))
### univariate
fit <- lm(currentDF[,PROTEIN] ~ currentDF[,TRAIT] + Age + Gender +
Hypertension.composite + DiabetesStatus + SmokerStatus +
Med.Statin.LLD + Med.all.antiplatelet + GFR_MDRD + BMI +
MedHx_CVD + stenose,
data = currentDF)
model_step <- stepAIC(fit, direction = "both", trace = FALSE)
print(model_step)
print(summary(fit))
GLM.results.TEMP <- data.frame(matrix(NA, ncol = 15, nrow = 0))
GLM.results.TEMP[1,] = GLM.CON(fit, "AEDB.CEA", PROTEIN, TRAIT, verbose = TRUE)
GLM.results = rbind(GLM.results, GLM.results.TEMP)
}
}
Analysis of MCP1_pg_ug_2015_rank.
- processing Macrophages_rank
Note: Using an external vector in selections is ambiguous.
[34mℹ[39m Use `all_of(COVARIATES_M2)` instead of `COVARIATES_M2` to silence this message.
[34mℹ[39m See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
[90mThis message is displayed once per session.[39m
Call:
lm(formula = currentDF[, PROTEIN] ~ currentDF[, TRAIT] + Hypertension.composite +
Med.Statin.LLD, data = currentDF)
Coefficients:
(Intercept) currentDF[, TRAIT] Hypertension.compositeyes Med.Statin.LLDyes
0.30069 -0.05373 -0.20206 -0.14604
Call:
lm(formula = currentDF[, PROTEIN] ~ currentDF[, TRAIT] + Age +
Gender + Hypertension.composite + DiabetesStatus + SmokerStatus +
Med.Statin.LLD + Med.all.antiplatelet + GFR_MDRD + BMI +
MedHx_CVD + stenose, data = currentDF)
Residuals:
Min 1Q Median 3Q Max
-3.3053 -0.6548 -0.0242 0.6293 3.3436
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.2390578 0.6042876 0.396 0.6925
currentDF[, TRAIT] -0.0612856 0.0317785 -1.929 0.0541 .
Age -0.0022303 0.0039334 -0.567 0.5708
Gendermale 0.0974448 0.0709000 1.374 0.1696
Hypertension.compositeyes -0.2024463 0.0966092 -2.096 0.0364 *
DiabetesStatusDiabetes -0.0289768 0.0772772 -0.375 0.7078
SmokerStatusEx-smoker -0.0230909 0.0731098 -0.316 0.7522
SmokerStatusNever smoked 0.1337644 0.1030877 1.298 0.1947
Med.Statin.LLDyes -0.1502085 0.0774112 -1.940 0.0526 .
Med.all.antiplateletyes 0.0018191 0.1086828 0.017 0.9866
GFR_MDRD -0.0005437 0.0016784 -0.324 0.7461
BMI -0.0026857 0.0088250 -0.304 0.7609
MedHx_CVDyes 0.0199269 0.0664034 0.300 0.7642
stenose50-70% 0.2704089 0.4317030 0.626 0.5312
stenose70-90% 0.3081981 0.4140028 0.744 0.4568
stenose90-99% 0.1802418 0.4140082 0.435 0.6634
stenose100% (Occlusion) -0.2457719 0.5317403 -0.462 0.6440
stenose50-99% 0.4778476 0.6484366 0.737 0.4613
stenose70-99% 0.4432860 0.5811508 0.763 0.4458
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 1.002 on 1000 degrees of freedom
Multiple R-squared: 0.0227, Adjusted R-squared: 0.005108
F-statistic: 1.29 on 18 and 1000 DF, p-value: 0.1851
Analyzing in dataset ' AEDB.CEA ' the association of ' MCP1_pg_ug_2015_rank ' with ' Macrophages_rank ' .
Collecting data.
We have collected the following and summarize it in an object:
Dataset...................: AEDB.CEA
Score/Exposure/biomarker..: MCP1_pg_ug_2015_rank
Trait/outcome.............: Macrophages_rank
Effect size...............: -0.061286
Standard error............: 0.031778
Odds ratio (effect size)..: 0.941
Lower 95% CI..............: 0.884
Upper 95% CI..............: 1.001
T-value...................: -1.928524
P-value...................: 0.05407278
R^2.......................: 0.0227
Adjusted r^2..............: 0.005108
Sample size of AE DB......: 2421
Sample size of model......: 1019
Missing data %............: 57.90995
- processing SMC_rank
Call:
lm(formula = currentDF[, PROTEIN] ~ currentDF[, TRAIT] + Hypertension.composite +
Med.Statin.LLD, data = currentDF)
Coefficients:
(Intercept) currentDF[, TRAIT] Hypertension.compositeyes Med.Statin.LLDyes
0.3084 -0.1064 -0.2034 -0.1530
Call:
lm(formula = currentDF[, PROTEIN] ~ currentDF[, TRAIT] + Age +
Gender + Hypertension.composite + DiabetesStatus + SmokerStatus +
Med.Statin.LLD + Med.all.antiplatelet + GFR_MDRD + BMI +
MedHx_CVD + stenose, data = currentDF)
Residuals:
Min 1Q Median 3Q Max
-3.3130 -0.6745 -0.0099 0.6299 3.3696
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.3219731 0.6046171 0.533 0.59448
currentDF[, TRAIT] -0.1029968 0.0329730 -3.124 0.00184 **
Age -0.0034454 0.0039631 -0.869 0.38485
Gendermale 0.0487194 0.0717747 0.679 0.49743
Hypertension.compositeyes -0.1954166 0.0965630 -2.024 0.04327 *
DiabetesStatusDiabetes -0.0219028 0.0772121 -0.284 0.77672
SmokerStatusEx-smoker -0.0209122 0.0731694 -0.286 0.77509
SmokerStatusNever smoked 0.1054769 0.1030387 1.024 0.30624
Med.Statin.LLDyes -0.1609021 0.0774630 -2.077 0.03804 *
Med.all.antiplateletyes -0.0039324 0.1085928 -0.036 0.97112
GFR_MDRD -0.0004018 0.0016792 -0.239 0.81092
BMI -0.0026011 0.0088289 -0.295 0.76836
MedHx_CVDyes 0.0116355 0.0664377 0.175 0.86101
stenose50-70% 0.3063875 0.4313296 0.710 0.47766
stenose70-90% 0.3397682 0.4136695 0.821 0.41164
stenose90-99% 0.2223931 0.4138010 0.537 0.59108
stenose100% (Occlusion) -0.1771374 0.5313862 -0.333 0.73894
stenose50-99% 0.5447894 0.6479149 0.841 0.40064
stenose70-99% 0.4871557 0.5806444 0.839 0.40168
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 1.001 on 996 degrees of freedom
Multiple R-squared: 0.02833, Adjusted R-squared: 0.01077
F-statistic: 1.613 on 18 and 996 DF, p-value: 0.05023
Analyzing in dataset ' AEDB.CEA ' the association of ' MCP1_pg_ug_2015_rank ' with ' SMC_rank ' .
Collecting data.
We have collected the following and summarize it in an object:
Dataset...................: AEDB.CEA
Score/Exposure/biomarker..: MCP1_pg_ug_2015_rank
Trait/outcome.............: SMC_rank
Effect size...............: -0.102997
Standard error............: 0.032973
Odds ratio (effect size)..: 0.902
Lower 95% CI..............: 0.846
Upper 95% CI..............: 0.962
T-value...................: -3.123675
P-value...................: 0.00183768
R^2.......................: 0.028328
Adjusted r^2..............: 0.010768
Sample size of AE DB......: 2421
Sample size of model......: 1015
Missing data %............: 58.07518
- processing VesselDensity_rank
Call:
lm(formula = currentDF[, PROTEIN] ~ currentDF[, TRAIT] + Hypertension.composite +
Med.Statin.LLD, data = currentDF)
Coefficients:
(Intercept) currentDF[, TRAIT] Hypertension.compositeyes Med.Statin.LLDyes
0.2912 -0.1439 -0.1849 -0.1662
Call:
lm(formula = currentDF[, PROTEIN] ~ currentDF[, TRAIT] + Age +
Gender + Hypertension.composite + DiabetesStatus + SmokerStatus +
Med.Statin.LLD + Med.all.antiplatelet + GFR_MDRD + BMI +
MedHx_CVD + stenose, data = currentDF)
Residuals:
Min 1Q Median 3Q Max
-3.1614 -0.6484 -0.0233 0.6402 3.4332
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.1767459 0.6402229 0.276 0.7826
currentDF[, TRAIT] -0.1420732 0.0332558 -4.272 2.13e-05 ***
Age -0.0009175 0.0040589 -0.226 0.8212
Gendermale 0.1109606 0.0733445 1.513 0.1307
Hypertension.compositeyes -0.1942298 0.1002766 -1.937 0.0531 .
DiabetesStatusDiabetes -0.0328877 0.0819413 -0.401 0.6882
SmokerStatusEx-smoker -0.0313111 0.0760495 -0.412 0.6806
SmokerStatusNever smoked 0.1139310 0.1071394 1.063 0.2879
Med.Statin.LLDyes -0.1671740 0.0796915 -2.098 0.0362 *
Med.all.antiplateletyes 0.0397565 0.1144791 0.347 0.7285
GFR_MDRD -0.0010896 0.0017515 -0.622 0.5340
BMI -0.0016953 0.0091572 -0.185 0.8532
MedHx_CVDyes 0.0074801 0.0688297 0.109 0.9135
stenose50-70% 0.1841594 0.4727540 0.390 0.6970
stenose70-90% 0.2563949 0.4542951 0.564 0.5726
stenose90-99% 0.1436685 0.4538378 0.317 0.7516
stenose100% (Occlusion) -0.2562619 0.5637405 -0.455 0.6495
stenose50-99% 0.5911304 0.6754523 0.875 0.3817
stenose70-99% 0.1673943 0.6766921 0.247 0.8047
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 1.005 on 932 degrees of freedom
Multiple R-squared: 0.03977, Adjusted R-squared: 0.02122
F-statistic: 2.144 on 18 and 932 DF, p-value: 0.003707
Analyzing in dataset ' AEDB.CEA ' the association of ' MCP1_pg_ug_2015_rank ' with ' VesselDensity_rank ' .
Collecting data.
We have collected the following and summarize it in an object:
Dataset...................: AEDB.CEA
Score/Exposure/biomarker..: MCP1_pg_ug_2015_rank
Trait/outcome.............: VesselDensity_rank
Effect size...............: -0.142073
Standard error............: 0.033256
Odds ratio (effect size)..: 0.868
Lower 95% CI..............: 0.813
Upper 95% CI..............: 0.926
T-value...................: -4.272134
P-value...................: 2.134847e-05
R^2.......................: 0.039768
Adjusted r^2..............: 0.021223
Sample size of AE DB......: 2421
Sample size of model......: 951
Missing data %............: 60.71871
cat("Edit the column names...\n")
Edit the column names...
colnames(GLM.results) = c("Dataset", "Predictor", "Trait",
"Beta", "s.e.m.",
"OR", "low95CI", "up95CI",
"T-value", "P-value", "r^2", "r^2_adj", "AE_N", "Model_N", "Perc_Miss")
cat("Correct the variable types...\n")
Correct the variable types...
GLM.results$Beta <- as.numeric(GLM.results$Beta)
GLM.results$s.e.m. <- as.numeric(GLM.results$s.e.m.)
GLM.results$OR <- as.numeric(GLM.results$OR)
GLM.results$low95CI <- as.numeric(GLM.results$low95CI)
GLM.results$up95CI <- as.numeric(GLM.results$up95CI)
GLM.results$`T-value` <- as.numeric(GLM.results$`T-value`)
GLM.results$`P-value` <- as.numeric(GLM.results$`P-value`)
GLM.results$`r^2` <- as.numeric(GLM.results$`r^2`)
GLM.results$`r^2_adj` <- as.numeric(GLM.results$`r^2_adj`)
GLM.results$`AE_N` <- as.numeric(GLM.results$`AE_N`)
GLM.results$`Model_N` <- as.numeric(GLM.results$`Model_N`)
GLM.results$`Perc_Miss` <- as.numeric(GLM.results$`Perc_Miss`)
# Save the data
cat("Writing results to Excel-file...\n")
Writing results to Excel-file...
### Univariate
library(openxlsx)
write.xlsx(GLM.results,
file = paste0(OUT_loc, "/",Today,".AEDB.CEA.Con.Multi.Protein.PlaquePhenotypes.RANK.MODEL2.xlsx"),
row.names = FALSE, col.names = TRUE, sheetName = "Con.Multi.PlaquePheno")
# Removing intermediates
cat("Removing intermediate files...\n")
Removing intermediate files...
rm(TRAIT, trait, currentDF, GLM.results, GLM.results.TEMP, fit, model_step)
Analysis of binary plaque traits as a function of serum/plaque MCP1 levels.
GLM.results <- data.frame(matrix(NA, ncol = 16, nrow = 0))
for (protein in 1:length(TRAITS.PROTEIN.RANK)) {
PROTEIN = TRAITS.PROTEIN.RANK[protein]
cat(paste0("\nAnalysis of ",PROTEIN,".\n"))
for (trait in 1:length(TRAITS.BIN)) {
TRAIT = TRAITS.BIN[trait]
cat(paste0("\n- processing ",TRAIT,"\n\n"))
currentDF <- as.data.frame(AEDB.CEA %>%
dplyr::select(., PROTEIN, TRAIT, COVARIATES_M2) %>%
filter(complete.cases(.))) %>%
filter_if(~is.numeric(.), all_vars(!is.infinite(.)))
# for debug
# print(DT::datatable(currentDF))
# print(nrow(currentDF))
# print(str(currentDF))
# print(class(currentDF[,TRAIT]))
### univariate
fit <- glm(as.factor(currentDF[,TRAIT]) ~ currentDF[,PROTEIN] + Age + Gender +
Hypertension.composite + DiabetesStatus + SmokerStatus +
Med.Statin.LLD + Med.all.antiplatelet + GFR_MDRD + BMI +
MedHx_CVD + stenose,
data = currentDF, family = binomial(link = "logit"))
model_step <- stepAIC(fit, direction = "both", trace = FALSE)
print(model_step)
print(summary(fit))
GLM.results.TEMP <- data.frame(matrix(NA, ncol = 16, nrow = 0))
GLM.results.TEMP[1,] = GLM.BIN(fit, "AEDB.CEA", PROTEIN, TRAIT, verbose = TRUE)
GLM.results = rbind(GLM.results, GLM.results.TEMP)
}
}
Analysis of MCP1_pg_ug_2015_rank.
- processing CalcificationPlaque
Call: glm(formula = as.factor(currentDF[, TRAIT]) ~ currentDF[, PROTEIN] +
Age + SmokerStatus + stenose, family = binomial(link = "logit"),
data = currentDF)
Coefficients:
(Intercept) currentDF[, PROTEIN] Age SmokerStatusEx-smoker SmokerStatusNever smoked stenose50-70% stenose70-90%
-1.16173 -0.47346 0.01896 -0.40609 -0.44306 -0.56758 -0.01349
stenose90-99% stenose100% (Occlusion) stenose50-99% stenose70-99%
0.18538 1.15456 -14.43672 -0.79787
Degrees of Freedom: 1023 Total (i.e. Null); 1013 Residual
Null Deviance: 1417
Residual Deviance: 1335 AIC: 1357
Call:
glm(formula = as.factor(currentDF[, TRAIT]) ~ currentDF[, PROTEIN] +
Age + Gender + Hypertension.composite + DiabetesStatus +
SmokerStatus + Med.Statin.LLD + Med.all.antiplatelet + GFR_MDRD +
BMI + MedHx_CVD + stenose, family = binomial(link = "logit"),
data = currentDF)
Deviance Residuals:
Min 1Q Median 3Q Max
-1.8447 -1.0810 -0.7025 1.1078 2.0019
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -1.522e+00 1.248e+00 -1.220 0.22262
currentDF[, PROTEIN] -4.740e-01 6.875e-02 -6.895 5.38e-12 ***
Age 1.797e-02 8.224e-03 2.185 0.02892 *
Gendermale -9.953e-02 1.469e-01 -0.677 0.49809
Hypertension.compositeyes 2.389e-01 2.017e-01 1.185 0.23618
DiabetesStatusDiabetes -2.266e-01 1.619e-01 -1.400 0.16160
SmokerStatusEx-smoker -4.096e-01 1.525e-01 -2.687 0.00722 **
SmokerStatusNever smoked -4.851e-01 2.157e-01 -2.249 0.02454 *
Med.Statin.LLDyes -1.636e-01 1.607e-01 -1.019 0.30842
Med.all.antiplateletyes -7.738e-02 2.248e-01 -0.344 0.73071
GFR_MDRD -5.302e-04 3.511e-03 -0.151 0.87997
BMI 2.166e-02 1.839e-02 1.178 0.23892
MedHx_CVDyes -1.660e-02 1.377e-01 -0.121 0.90406
stenose50-70% -5.216e-01 8.838e-01 -0.590 0.55510
stenose70-90% 9.803e-03 8.419e-01 0.012 0.99071
stenose90-99% 2.028e-01 8.420e-01 0.241 0.80969
stenose100% (Occlusion) 1.181e+00 1.175e+00 1.005 0.31496
stenose50-99% -1.444e+01 4.192e+02 -0.034 0.97252
stenose70-99% -7.533e-01 1.208e+00 -0.624 0.53294
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 1417.5 on 1023 degrees of freedom
Residual deviance: 1329.0 on 1005 degrees of freedom
AIC: 1367
Number of Fisher Scoring iterations: 13
Analyzing in dataset ' AEDB.CEA ' the association of ' MCP1_pg_ug_2015_rank ' with ' CalcificationPlaque ' ...
Collecting data...
We have collected the following and summarize it in an object:
Dataset...................: AEDB.CEA
Score/Exposure/biomarker..: MCP1_pg_ug_2015_rank
Trait/outcome.............: CalcificationPlaque
Effect size...............: -0.474031
Standard error............: 0.068749
Odds ratio (effect size)..: 0.622
Lower 95% CI..............: 0.544
Upper 95% CI..............: 0.712
Z-value...................: -6.895134
P-value...................: 5.381419e-12
Hosmer and Lemeshow r^2...: 0.062418
Cox and Snell r^2.........: 0.082776
Nagelkerke's pseudo r^2...: 0.110443
Sample size of AE DB......: 2421
Sample size of model......: 1024
Missing data %............: 57.70343
- processing CollagenPlaque
Call: glm(formula = as.factor(currentDF[, TRAIT]) ~ currentDF[, PROTEIN] +
SmokerStatus + BMI + MedHx_CVD, family = binomial(link = "logit"),
data = currentDF)
Coefficients:
(Intercept) currentDF[, PROTEIN] SmokerStatusEx-smoker SmokerStatusNever smoked BMI MedHx_CVDyes
0.47638 -0.24086 -0.38304 -0.63224 0.03905 0.25051
Degrees of Freedom: 1024 Total (i.e. Null); 1019 Residual
Null Deviance: 1048
Residual Deviance: 1025 AIC: 1037
Call:
glm(formula = as.factor(currentDF[, TRAIT]) ~ currentDF[, PROTEIN] +
Age + Gender + Hypertension.composite + DiabetesStatus +
SmokerStatus + Med.Statin.LLD + Med.all.antiplatelet + GFR_MDRD +
BMI + MedHx_CVD + stenose, family = binomial(link = "logit"),
data = currentDF)
Deviance Residuals:
Min 1Q Median 3Q Max
-2.2698 0.4534 0.6199 0.7192 1.1840
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) 1.385e+01 9.628e+02 0.014 0.98852
currentDF[, PROTEIN] -2.330e-01 7.922e-02 -2.941 0.00327 **
Age 1.362e-02 9.731e-03 1.400 0.16155
Gendermale -4.542e-02 1.763e-01 -0.258 0.79672
Hypertension.compositeyes 2.446e-01 2.286e-01 1.070 0.28467
DiabetesStatusDiabetes 7.902e-02 1.972e-01 0.401 0.68861
SmokerStatusEx-smoker -4.387e-01 1.893e-01 -2.317 0.02051 *
SmokerStatusNever smoked -7.570e-01 2.485e-01 -3.046 0.00232 **
Med.Statin.LLDyes 2.795e-03 1.921e-01 0.015 0.98839
Med.all.antiplateletyes 2.526e-01 2.597e-01 0.973 0.33067
GFR_MDRD 5.011e-03 4.226e-03 1.186 0.23576
BMI 4.166e-02 2.314e-02 1.801 0.07175 .
MedHx_CVDyes 2.219e-01 1.635e-01 1.357 0.17468
stenose50-70% -1.480e+01 9.628e+02 -0.015 0.98774
stenose70-90% -1.509e+01 9.628e+02 -0.016 0.98750
stenose90-99% -1.519e+01 9.628e+02 -0.016 0.98741
stenose100% (Occlusion) 1.169e-01 1.242e+03 0.000 0.99992
stenose50-99% -3.410e-02 1.515e+03 0.000 0.99998
stenose70-99% -1.464e+01 9.628e+02 -0.015 0.98787
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 1047.6 on 1024 degrees of freedom
Residual deviance: 1010.2 on 1006 degrees of freedom
AIC: 1048.2
Number of Fisher Scoring iterations: 15
Analyzing in dataset ' AEDB.CEA ' the association of ' MCP1_pg_ug_2015_rank ' with ' CollagenPlaque ' ...
Collecting data...
We have collected the following and summarize it in an object:
Dataset...................: AEDB.CEA
Score/Exposure/biomarker..: MCP1_pg_ug_2015_rank
Trait/outcome.............: CollagenPlaque
Effect size...............: -0.232976
Standard error............: 0.079216
Odds ratio (effect size)..: 0.792
Lower 95% CI..............: 0.678
Upper 95% CI..............: 0.925
Z-value...................: -2.941031
P-value...................: 0.003271218
Hosmer and Lemeshow r^2...: 0.035759
Cox and Snell r^2.........: 0.035889
Nagelkerke's pseudo r^2...: 0.056063
Sample size of AE DB......: 2421
Sample size of model......: 1025
Missing data %............: 57.66212
- processing Fat10Perc
Call: glm(formula = as.factor(currentDF[, TRAIT]) ~ currentDF[, PROTEIN] +
Gender + SmokerStatus + stenose, family = binomial(link = "logit"),
data = currentDF)
Coefficients:
(Intercept) currentDF[, PROTEIN] Gendermale SmokerStatusEx-smoker SmokerStatusNever smoked stenose50-70% stenose70-90%
13.8072 0.1608 0.9217 -0.2543 0.3188 -13.4987 -13.3997
stenose90-99% stenose100% (Occlusion) stenose50-99% stenose70-99%
-13.2648 -13.8075 -15.8024 -14.6344
Degrees of Freedom: 1024 Total (i.e. Null); 1014 Residual
Null Deviance: 1208
Residual Deviance: 1153 AIC: 1175
Call:
glm(formula = as.factor(currentDF[, TRAIT]) ~ currentDF[, PROTEIN] +
Age + Gender + Hypertension.composite + DiabetesStatus +
SmokerStatus + Med.Statin.LLD + Med.all.antiplatelet + GFR_MDRD +
BMI + MedHx_CVD + stenose, family = binomial(link = "logit"),
data = currentDF)
Deviance Residuals:
Min 1Q Median 3Q Max
-2.0874 -1.2279 0.6773 0.7958 1.5145
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) 1.323e+01 3.540e+02 0.037 0.9702
currentDF[, PROTEIN] 1.538e-01 7.309e-02 2.105 0.0353 *
Age 7.892e-03 8.954e-03 0.881 0.3781
Gendermale 9.325e-01 1.563e-01 5.966 2.43e-09 ***
Hypertension.compositeyes -3.351e-02 2.222e-01 -0.151 0.8802
DiabetesStatusDiabetes -1.599e-01 1.759e-01 -0.909 0.3634
SmokerStatusEx-smoker -2.887e-01 1.683e-01 -1.716 0.0862 .
SmokerStatusNever smoked 2.644e-01 2.474e-01 1.069 0.2852
Med.Statin.LLDyes -2.292e-01 1.845e-01 -1.242 0.2141
Med.all.antiplateletyes 1.095e-01 2.495e-01 0.439 0.6608
GFR_MDRD 4.634e-04 3.879e-03 0.119 0.9049
BMI 4.721e-03 1.983e-02 0.238 0.8118
MedHx_CVDyes 7.604e-02 1.518e-01 0.501 0.6164
stenose50-70% -1.352e+01 3.539e+02 -0.038 0.9695
stenose70-90% -1.339e+01 3.539e+02 -0.038 0.9698
stenose90-99% -1.327e+01 3.539e+02 -0.037 0.9701
stenose100% (Occlusion) -1.382e+01 3.540e+02 -0.039 0.9689
stenose50-99% -1.581e+01 3.540e+02 -0.045 0.9644
stenose70-99% -1.472e+01 3.540e+02 -0.042 0.9668
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 1207.9 on 1024 degrees of freedom
Residual deviance: 1149.4 on 1006 degrees of freedom
AIC: 1187.4
Number of Fisher Scoring iterations: 13
Analyzing in dataset ' AEDB.CEA ' the association of ' MCP1_pg_ug_2015_rank ' with ' Fat10Perc ' ...
Collecting data...
We have collected the following and summarize it in an object:
Dataset...................: AEDB.CEA
Score/Exposure/biomarker..: MCP1_pg_ug_2015_rank
Trait/outcome.............: Fat10Perc
Effect size...............: 0.153844
Standard error............: 0.073089
Odds ratio (effect size)..: 1.166
Lower 95% CI..............: 1.011
Upper 95% CI..............: 1.346
Z-value...................: 2.104886
P-value...................: 0.03530121
Hosmer and Lemeshow r^2...: 0.048471
Cox and Snell r^2.........: 0.055521
Nagelkerke's pseudo r^2...: 0.080203
Sample size of AE DB......: 2421
Sample size of model......: 1025
Missing data %............: 57.66212
- processing IPH
Call: glm(formula = as.factor(currentDF[, TRAIT]) ~ currentDF[, PROTEIN] +
Gender + Med.Statin.LLD + BMI + MedHx_CVD, family = binomial(link = "logit"),
data = currentDF)
Coefficients:
(Intercept) currentDF[, PROTEIN] Gendermale Med.Statin.LLDyes BMI MedHx_CVDyes
-0.71077 -0.14275 0.51657 -0.27539 0.03056 0.36067
Degrees of Freedom: 1022 Total (i.e. Null); 1017 Residual
Null Deviance: 1367
Residual Deviance: 1337 AIC: 1349
Call:
glm(formula = as.factor(currentDF[, TRAIT]) ~ currentDF[, PROTEIN] +
Age + Gender + Hypertension.composite + DiabetesStatus +
SmokerStatus + Med.Statin.LLD + Med.all.antiplatelet + GFR_MDRD +
BMI + MedHx_CVD + stenose, family = binomial(link = "logit"),
data = currentDF)
Deviance Residuals:
Min 1Q Median 3Q Max
-1.9526 -1.2816 0.8168 0.9931 1.5440
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -0.3744407 1.2806515 -0.292 0.7700
currentDF[, PROTEIN] -0.1420106 0.0661286 -2.147 0.0318 *
Age 0.0002745 0.0081696 0.034 0.9732
Gendermale 0.5755452 0.1456998 3.950 7.81e-05 ***
Hypertension.compositeyes -0.1304980 0.2014410 -0.648 0.5171
DiabetesStatusDiabetes -0.1239874 0.1610422 -0.770 0.4414
SmokerStatusEx-smoker -0.1106836 0.1537985 -0.720 0.4717
SmokerStatusNever smoked -0.1474556 0.2125316 -0.694 0.4878
Med.Statin.LLDyes -0.2598492 0.1641604 -1.583 0.1134
Med.all.antiplateletyes 0.0938507 0.2263983 0.415 0.6785
GFR_MDRD -0.0048629 0.0035299 -1.378 0.1683
BMI 0.0346557 0.0186016 1.863 0.0625 .
MedHx_CVDyes 0.3433350 0.1371721 2.503 0.0123 *
stenose50-70% -0.3005803 0.9270679 -0.324 0.7458
stenose70-90% -0.1234783 0.8929528 -0.138 0.8900
stenose90-99% 0.1233878 0.8933095 0.138 0.8901
stenose100% (Occlusion) -0.3672092 1.1204001 -0.328 0.7431
stenose50-99% -0.5109985 1.3494741 -0.379 0.7049
stenose70-99% 1.3028582 1.4085773 0.925 0.3550
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 1367.4 on 1022 degrees of freedom
Residual deviance: 1325.7 on 1004 degrees of freedom
AIC: 1363.7
Number of Fisher Scoring iterations: 4
Analyzing in dataset ' AEDB.CEA ' the association of ' MCP1_pg_ug_2015_rank ' with ' IPH ' ...
Collecting data...
We have collected the following and summarize it in an object:
Dataset...................: AEDB.CEA
Score/Exposure/biomarker..: MCP1_pg_ug_2015_rank
Trait/outcome.............: IPH
Effect size...............: -0.142011
Standard error............: 0.066129
Odds ratio (effect size)..: 0.868
Lower 95% CI..............: 0.762
Upper 95% CI..............: 0.988
Z-value...................: -2.14749
P-value...................: 0.03175426
Hosmer and Lemeshow r^2...: 0.030475
Cox and Snell r^2.........: 0.039915
Nagelkerke's pseudo r^2...: 0.054139
Sample size of AE DB......: 2421
Sample size of model......: 1023
Missing data %............: 57.74473
cat("Edit the column names...\n")
Edit the column names...
colnames(GLM.results) = c("Dataset", "Predictor", "Trait",
"Beta", "s.e.m.",
"OR", "low95CI", "up95CI",
"Z-value", "P-value", "r^2_l", "r^2_cs", "r^2_nagelkerke", "AE_N", "Model_N", "Perc_Miss")
cat("Correct the variable types...\n")
Correct the variable types...
GLM.results$Beta <- as.numeric(GLM.results$Beta)
GLM.results$s.e.m. <- as.numeric(GLM.results$s.e.m.)
GLM.results$OR <- as.numeric(GLM.results$OR)
GLM.results$low95CI <- as.numeric(GLM.results$low95CI)
GLM.results$up95CI <- as.numeric(GLM.results$up95CI)
GLM.results$`Z-value` <- as.numeric(GLM.results$`Z-value`)
GLM.results$`P-value` <- as.numeric(GLM.results$`P-value`)
GLM.results$`r^2_l` <- as.numeric(GLM.results$`r^2_l`)
GLM.results$`r^2_cs` <- as.numeric(GLM.results$`r^2_cs`)
GLM.results$`r^2_nagelkerke` <- as.numeric(GLM.results$`r^2_nagelkerke`)
GLM.results$`AE_N` <- as.numeric(GLM.results$`AE_N`)
GLM.results$`Model_N` <- as.numeric(GLM.results$`Model_N`)
GLM.results$`Perc_Miss` <- as.numeric(GLM.results$`Perc_Miss`)
# Save the data
cat("Writing results to Excel-file...\n")
Writing results to Excel-file...
### Univariate
write.xlsx(GLM.results,
file = paste0(OUT_loc, "/",Today,".AEDB.CEA.Bin.Multi.Protein.PlaquePhenotypes.RANK.MODEL2.xlsx"),
row.names = FALSE, col.names = TRUE, sheetName = "Bin.Multi.PlaquePheno")
# Removing intermediates
cat("Removing intermediate files...\n")
Removing intermediate files...
rm(TRAIT, trait, currentDF, GLM.results, GLM.results.TEMP, fit, model_step)
We will perform a cross-sectional analysis between plaque and serum MCP1, IL6, and IL6R levels and the ‘clinical status’ of the plaque in terms of presence of patients’ symptoms (symptomatic vs. asymptomatic). The symptoms of interest are:
In this model we correct for Age, and Gender.
Here we use the inverse-rank normalized data - visually this is more normally distributed.
GLM.results <- data.frame(matrix(NA, ncol = 16, nrow = 0))
for (protein in 1:length(TRAITS.PROTEIN.RANK)) {
PROTEIN = TRAITS.PROTEIN.RANK[protein]
cat(paste0("\nAnalysis of ",PROTEIN,".\n"))
TRAIT = "AsymptSympt"
cat(paste0("\n- processing ",TRAIT,"\n\n"))
currentDF <- as.data.frame(AEDB.CEA %>%
dplyr::select(., PROTEIN, TRAIT, COVARIATES_M1) %>%
filter(complete.cases(.))) %>%
filter_if(~is.numeric(.), all_vars(!is.infinite(.)))
# for debug
# print(DT::datatable(currentDF))
# print(nrow(currentDF))
# print(str(currentDF))
# print(class(currentDF[,TRAIT]))
### univariate
# + Hypertension.composite + DiabetesStatus + SmokerCurrent +
# Med.Statin.LLD + Med.all.antiplatelet + GFR_MDRD + BMI +
# CAD_history + Stroke_history + Peripheral.interv + stenose
fit <- glm(as.factor(currentDF[,TRAIT]) ~ currentDF[,PROTEIN] + Age + Gender,
data = currentDF, family = binomial(link = "logit"))
model_step <- stepAIC(fit, direction = "both", trace = FALSE)
print(model_step)
print(summary(fit))
GLM.results.TEMP <- data.frame(matrix(NA, ncol = 16, nrow = 0))
GLM.results.TEMP[1,] = GLM.BIN(fit, "AEDB.CEA", PROTEIN, TRAIT, verbose = TRUE)
GLM.results = rbind(GLM.results, GLM.results.TEMP)
}
Analysis of MCP1_pg_ug_2015_rank.
- processing AsymptSympt
Call: glm(formula = as.factor(currentDF[, TRAIT]) ~ currentDF[, PROTEIN] +
Age + Gender, family = binomial(link = "logit"), data = currentDF)
Coefficients:
(Intercept) currentDF[, PROTEIN] Age Gendermale
0.24776 0.26397 0.03239 -0.43507
Degrees of Freedom: 1195 Total (i.e. Null); 1192 Residual
Null Deviance: 826.5
Residual Deviance: 804.7 AIC: 812.7
Call:
glm(formula = as.factor(currentDF[, TRAIT]) ~ currentDF[, PROTEIN] +
Age + Gender, family = binomial(link = "logit"), data = currentDF)
Deviance Residuals:
Min 1Q Median 3Q Max
-2.5654 0.3738 0.4440 0.5167 0.8434
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) 0.24776 0.69360 0.357 0.72093
currentDF[, PROTEIN] 0.26397 0.09376 2.815 0.00487 **
Age 0.03239 0.01011 3.203 0.00136 **
Gendermale -0.43507 0.21792 -1.996 0.04588 *
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 826.52 on 1195 degrees of freedom
Residual deviance: 804.69 on 1192 degrees of freedom
AIC: 812.69
Number of Fisher Scoring iterations: 5
Analyzing in dataset ' AEDB.CEA ' the association of ' MCP1_pg_ug_2015_rank ' with ' AsymptSympt ' ...
Collecting data...
We have collected the following and summarize it in an object:
Dataset...................: AEDB.CEA
Score/Exposure/biomarker..: MCP1_pg_ug_2015_rank
Trait/outcome.............: AsymptSympt
Effect size...............: 0.263965
Standard error............: 0.09376
Odds ratio (effect size)..: 1.302
Lower 95% CI..............: 1.083
Upper 95% CI..............: 1.565
Z-value...................: 2.815333
P-value...................: 0.004872671
Hosmer and Lemeshow r^2...: 0.026418
Cox and Snell r^2.........: 0.018091
Nagelkerke's pseudo r^2...: 0.036257
Sample size of AE DB......: 2421
Sample size of model......: 1196
Missing data %............: 50.59893
cat("Edit the column names...\n")
Edit the column names...
colnames(GLM.results) = c("Dataset", "Predictor", "Trait",
"Beta", "s.e.m.",
"OR", "low95CI", "up95CI",
"Z-value", "P-value", "r^2_l", "r^2_cs", "r^2_nagelkerke", "AE_N", "Model_N", "Perc_Miss")
cat("Correct the variable types...\n")
Correct the variable types...
GLM.results$Beta <- as.numeric(GLM.results$Beta)
GLM.results$s.e.m. <- as.numeric(GLM.results$s.e.m.)
GLM.results$OR <- as.numeric(GLM.results$OR)
GLM.results$low95CI <- as.numeric(GLM.results$low95CI)
GLM.results$up95CI <- as.numeric(GLM.results$up95CI)
GLM.results$`Z-value` <- as.numeric(GLM.results$`Z-value`)
GLM.results$`P-value` <- as.numeric(GLM.results$`P-value`)
GLM.results$`r^2_l` <- as.numeric(GLM.results$`r^2_l`)
GLM.results$`r^2_cs` <- as.numeric(GLM.results$`r^2_cs`)
GLM.results$`r^2_nagelkerke` <- as.numeric(GLM.results$`r^2_nagelkerke`)
GLM.results$`AE_N` <- as.numeric(GLM.results$`AE_N`)
GLM.results$`Model_N` <- as.numeric(GLM.results$`Model_N`)
GLM.results$`Perc_Miss` <- as.numeric(GLM.results$`Perc_Miss`)
# Save the data
cat("Writing results to Excel-file...\n")
Writing results to Excel-file...
### Univariate
write.xlsx(GLM.results,
file = paste0(OUT_loc, "/",Today,".AEDB.CEA.Bin.Uni.Protein.RANK.Symptoms.MODEL1.xlsx"),
row.names = FALSE, col.names = TRUE, sheetName = "Bin.Uni.Symptoms")
# Removing intermediates
cat("Removing intermediate files...\n")
Removing intermediate files...
rm(TRAIT, currentDF, GLM.results, GLM.results.TEMP, fit, model_step)
In this model we correct for Age, Gender, Hypertension status, Diabetes status, current smoker status, lipid-lowering drugs (LLDs), antiplatelet medication, eGFR (MDRD), BMI, MedHx_CVD (combination of CAD history, stroke history, and peripheral interventions), and stenosis..
Here we use the inverse-rank normalized data - visually this is more normally distributed.
GLM.results <- data.frame(matrix(NA, ncol = 16, nrow = 0))
for (protein in 1:length(TRAITS.PROTEIN.RANK)) {
PROTEIN = TRAITS.PROTEIN.RANK[protein]
cat(paste0("\nAnalysis of ",PROTEIN,".\n"))
TRAIT = "AsymptSympt"
cat(paste0("\n- processing ",TRAIT,"\n\n"))
currentDF <- as.data.frame(AEDB.CEA %>%
dplyr::select(., PROTEIN, TRAIT, COVARIATES_M2) %>%
filter(complete.cases(.))) %>%
filter_if(~is.numeric(.), all_vars(!is.infinite(.)))
# for debug
# print(DT::datatable(currentDF))
# print(nrow(currentDF))
# print(str(currentDF))
# print(class(currentDF[,TRAIT]))
### univariate
fit <- glm(as.factor(currentDF[,TRAIT]) ~ currentDF[,PROTEIN] + Age + Gender + Hypertension.composite + DiabetesStatus + SmokerStatus +
Med.Statin.LLD + Med.all.antiplatelet + GFR_MDRD + BMI +
MedHx_CVD + + stenose,
data = currentDF, family = binomial(link = "logit"))
model_step <- stepAIC(fit, direction = "both", trace = FALSE)
print(model_step)
print(summary(fit))
GLM.results.TEMP <- data.frame(matrix(NA, ncol = 16, nrow = 0))
GLM.results.TEMP[1,] = GLM.BIN(fit, "AEDB.CEA", PROTEIN, TRAIT, verbose = TRUE)
GLM.results = rbind(GLM.results, GLM.results.TEMP)
}
Analysis of MCP1_pg_ug_2015_rank.
- processing AsymptSympt
Call: glm(formula = as.factor(currentDF[, TRAIT]) ~ currentDF[, PROTEIN] +
Age + Gender + Med.all.antiplatelet + GFR_MDRD + stenose,
family = binomial(link = "logit"), data = currentDF)
Coefficients:
(Intercept) currentDF[, PROTEIN] Age Gendermale Med.all.antiplateletyes GFR_MDRD stenose50-70%
15.334896 0.272930 0.032194 -0.427140 -0.929815 0.007921 -13.442880
stenose70-90% stenose90-99% stenose100% (Occlusion) stenose50-99% stenose70-99%
-15.004949 -14.749153 -0.339553 -15.814682 -0.541412
Degrees of Freedom: 1035 Total (i.e. Null); 1024 Residual
Null Deviance: 726.5
Residual Deviance: 689.3 AIC: 713.3
Call:
glm(formula = as.factor(currentDF[, TRAIT]) ~ currentDF[, PROTEIN] +
Age + Gender + Hypertension.composite + DiabetesStatus +
SmokerStatus + Med.Statin.LLD + Med.all.antiplatelet + GFR_MDRD +
BMI + MedHx_CVD + +stenose, family = binomial(link = "logit"),
data = currentDF)
Deviance Residuals:
Min 1Q Median 3Q Max
-3.0446 0.3116 0.4390 0.5442 0.9328
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) 1.593e+01 9.540e+02 0.017 0.9867
currentDF[, PROTEIN] 2.558e-01 1.018e-01 2.512 0.0120 *
Age 3.508e-02 1.258e-02 2.788 0.0053 **
Gendermale -3.507e-01 2.366e-01 -1.482 0.1383
Hypertension.compositeyes -3.437e-01 3.443e-01 -0.998 0.3182
DiabetesStatusDiabetes -3.937e-02 2.423e-01 -0.162 0.8709
SmokerStatusEx-smoker -3.416e-01 2.335e-01 -1.463 0.1434
SmokerStatusNever smoked 2.040e-04 3.564e-01 0.001 0.9995
Med.Statin.LLDyes -1.797e-01 2.660e-01 -0.676 0.4991
Med.all.antiplateletyes -9.093e-01 4.789e-01 -1.899 0.0576 .
GFR_MDRD 7.160e-03 5.456e-03 1.312 0.1894
BMI -1.027e-02 2.789e-02 -0.368 0.7128
MedHx_CVDyes 9.204e-02 2.096e-01 0.439 0.6606
stenose50-70% -1.338e+01 9.540e+02 -0.014 0.9888
stenose70-90% -1.495e+01 9.540e+02 -0.016 0.9875
stenose90-99% -1.472e+01 9.540e+02 -0.015 0.9877
stenose100% (Occlusion) -3.915e-01 1.230e+03 0.000 0.9997
stenose50-99% -1.592e+01 9.540e+02 -0.017 0.9867
stenose70-99% -5.044e-01 1.190e+03 0.000 0.9997
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 726.47 on 1035 degrees of freedom
Residual deviance: 684.24 on 1017 degrees of freedom
AIC: 722.24
Number of Fisher Scoring iterations: 15
Analyzing in dataset ' AEDB.CEA ' the association of ' MCP1_pg_ug_2015_rank ' with ' AsymptSympt ' ...
Collecting data...
We have collected the following and summarize it in an object:
Dataset...................: AEDB.CEA
Score/Exposure/biomarker..: MCP1_pg_ug_2015_rank
Trait/outcome.............: AsymptSympt
Effect size...............: 0.255797
Standard error............: 0.10185
Odds ratio (effect size)..: 1.291
Lower 95% CI..............: 1.058
Upper 95% CI..............: 1.577
Z-value...................: 2.511506
P-value...................: 0.01202173
Hosmer and Lemeshow r^2...: 0.058134
Cox and Snell r^2.........: 0.039945
Nagelkerke's pseudo r^2...: 0.079253
Sample size of AE DB......: 2421
Sample size of model......: 1036
Missing data %............: 57.20777
cat("Edit the column names...\n")
Edit the column names...
colnames(GLM.results) = c("Dataset", "Predictor", "Trait",
"Beta", "s.e.m.",
"OR", "low95CI", "up95CI",
"Z-value", "P-value", "r^2_l", "r^2_cs", "r^2_nagelkerke", "AE_N", "Model_N", "Perc_Miss")
cat("Correct the variable types...\n")
Correct the variable types...
GLM.results$Beta <- as.numeric(GLM.results$Beta)
GLM.results$s.e.m. <- as.numeric(GLM.results$s.e.m.)
GLM.results$OR <- as.numeric(GLM.results$OR)
GLM.results$low95CI <- as.numeric(GLM.results$low95CI)
GLM.results$up95CI <- as.numeric(GLM.results$up95CI)
GLM.results$`Z-value` <- as.numeric(GLM.results$`Z-value`)
GLM.results$`P-value` <- as.numeric(GLM.results$`P-value`)
GLM.results$`r^2_l` <- as.numeric(GLM.results$`r^2_l`)
GLM.results$`r^2_cs` <- as.numeric(GLM.results$`r^2_cs`)
GLM.results$`r^2_nagelkerke` <- as.numeric(GLM.results$`r^2_nagelkerke`)
GLM.results$`AE_N` <- as.numeric(GLM.results$`AE_N`)
GLM.results$`Model_N` <- as.numeric(GLM.results$`Model_N`)
GLM.results$`Perc_Miss` <- as.numeric(GLM.results$`Perc_Miss`)
# Save the data
cat("Writing results to Excel-file...\n")
Writing results to Excel-file...
### Univariate
write.xlsx(GLM.results,
file = paste0(OUT_loc, "/",Today,".AEDB.CEA.Bin.Multi.Protein.RANK.Symptoms.MODEL2.xlsx"),
row.names = FALSE, col.names = TRUE, sheetName = "Bin.Multi.Symptoms")
# Removing intermediates
cat("Removing intermediate files...\n")
Removing intermediate files...
rm(TRAIT, currentDF, GLM.results, GLM.results.TEMP, fit, model_step)
For the longitudinal analyses of plaque and serum MCP1, IL6, and IL6R levels and secondary cardiovascular events over a three-year follow-up period.
The primary outcome is defined as “a composite of fatal or non-fatal myocardial infarction, fatal or non-fatal stroke, ruptured aortic aneurysm, fatal cardiac failure, coronary or peripheral interventions, leg amputation due to vascular causes, and cardiovascular death”, i.e. major adverse cardiovascular events (MACE). Variable: epmajor.3years, these include: - myocardial infarction (MI) - cerebral infarction (CVA/stroke) - cardiovascular death (exact cause to be investigated) - cerebral bleeding (CVA/stroke) - fatal myocardial infarction (MI) - fatal cerebral infarction - fatal cerebral bleeding - sudden death - fatal heart failure - fatal aneurysm rupture - other cardiovascular death..
The secondary outcomes will be
epstroke.3years, these include:
epcoronary.3years, these include:
epcvdeath.3years, these include:
We will use 3-year follow-up, but we will also calculate 30 days and 90 days follow-up ‘time-to-event’ variables. On average there are 365.25 days in a year. We can calculate 30-days and 90-days follow-up time based on the three years follow-up.
cutt.off.30days = (1/365.25) * 30
cutt.off.90days = (1/365.25) * 90
# Fix maximum FU of 30 and 90 days
AEDB <- AEDB %>%
mutate(
FU.cutt.off.30days = ifelse(max.followup <= cutt.off.30days, max.followup, cutt.off.30days),
FU.cutt.off.90days = ifelse(max.followup <= cutt.off.90days, max.followup, cutt.off.90days)
)
AEDB.temp <- subset(AEDB, select = c("STUDY_NUMBER", "UPID", "Age", "Gender", "Hospital", "Artery_summary",
"max.followup",
"FU.cutt.off.3years",
"FU.cutt.off.30days",
"FU.cutt.off.90days"))
require(labelled)
AEDB.temp$Gender <- to_factor(AEDB.temp$Gender)
AEDB.temp$Hospital <- to_factor(AEDB.temp$Hospital)
AEDB.temp$Artery_summary <- to_factor(AEDB.temp$Artery_summary)
DT::datatable(AEDB.temp[1:10,], caption = "Excerpt of the whole AEDB.", rownames = FALSE)
rm(AEDB.temp)
AEDB.CEA <- AEDB.CEA %>%
mutate(
FU.cutt.off.30days = ifelse(max.followup <= cutt.off.30days, max.followup, cutt.off.30days),
FU.cutt.off.90days = ifelse(max.followup <= cutt.off.90days, max.followup, cutt.off.90days)
)
AEDB.CEA.temp <- subset(AEDB.CEA, select = c("STUDY_NUMBER", "UPID", "Age", "Gender", "Hospital", "Artery_summary",
"max.followup",
"FU.cutt.off.3years",
"FU.cutt.off.30days",
"FU.cutt.off.90days"))
require(labelled)
AEDB.CEA.temp$Gender <- to_factor(AEDB.CEA.temp$Gender)
AEDB.CEA.temp$Hospital <- to_factor(AEDB.CEA.temp$Hospital)
AEDB.CEA.temp$Artery_summary <- to_factor(AEDB.CEA.temp$Artery_summary)
DT::datatable(AEDB.CEA.temp[1:10,], caption = "Excerpt of the whole AEDB.CEA.", rownames = FALSE)
rm(AEDB.CEA.temp)
Here we will calculate the new 30- and 90-days follow-up of the events and their event-times of interest:
epmajor.3years)epstroke.3years)epcoronary.3years)epcvdeath.3years)avg_days_in_year = 365.25
cutt.off.30days.scaled <- cutt.off.30days * 365.25
cutt.off.90days.scaled <- cutt.off.90days * 365.25
# Event times
AEDB <- AEDB %>%
mutate(
ep_major_t_30days = ifelse(ep_major_t_3years * avg_days_in_year <= cutt.off.30days.scaled,
ep_major_t_3years * avg_days_in_year, cutt.off.30days.scaled),
ep_stroke_t_30days = ifelse(ep_stroke_t_3years * avg_days_in_year <= cutt.off.30days.scaled,
ep_stroke_t_3years * avg_days_in_year, cutt.off.30days.scaled),
ep_coronary_t_30days = ifelse(ep_coronary_t_3years * avg_days_in_year <= cutt.off.30days.scaled,
ep_coronary_t_3years * avg_days_in_year, cutt.off.30days.scaled),
ep_cvdeath_t_30days = ifelse(ep_cvdeath_t_3years * avg_days_in_year <= cutt.off.30days.scaled,
ep_cvdeath_t_3years * avg_days_in_year, cutt.off.30days.scaled),
ep_major_t_90days = ifelse(ep_major_t_3years * avg_days_in_year <= cutt.off.90days.scaled,
ep_major_t_3years * avg_days_in_year, cutt.off.90days.scaled),
ep_stroke_t_90days = ifelse(ep_stroke_t_3years * avg_days_in_year <= cutt.off.90days.scaled,
ep_stroke_t_3years * avg_days_in_year, cutt.off.90days.scaled),
ep_coronary_t_90days = ifelse(ep_coronary_t_3years * avg_days_in_year <= cutt.off.90days.scaled,
ep_coronary_t_3years * avg_days_in_year, cutt.off.90days.scaled),
ep_cvdeath_t_90days = ifelse(ep_cvdeath_t_3years * avg_days_in_year <= cutt.off.90days.scaled,
ep_cvdeath_t_3years * avg_days_in_year, cutt.off.90days.scaled)
)
AEDB.CEA <- AEDB.CEA %>%
mutate(
ep_major_t_30days = ifelse(ep_major_t_3years * avg_days_in_year <= cutt.off.30days.scaled,
ep_major_t_3years * avg_days_in_year, cutt.off.30days.scaled),
ep_stroke_t_30days = ifelse(ep_stroke_t_3years * avg_days_in_year <= cutt.off.30days.scaled,
ep_stroke_t_3years * avg_days_in_year, cutt.off.30days.scaled),
ep_coronary_t_30days = ifelse(ep_coronary_t_3years * avg_days_in_year <= cutt.off.30days.scaled,
ep_coronary_t_3years * avg_days_in_year, cutt.off.30days.scaled),
ep_cvdeath_t_30days = ifelse(ep_cvdeath_t_3years * avg_days_in_year <= cutt.off.30days.scaled,
ep_cvdeath_t_3years * avg_days_in_year, cutt.off.30days.scaled),
ep_major_t_90days = ifelse(ep_major_t_3years * avg_days_in_year <= cutt.off.90days.scaled,
ep_major_t_3years * avg_days_in_year, cutt.off.90days.scaled),
ep_stroke_t_90days = ifelse(ep_stroke_t_3years * avg_days_in_year <= cutt.off.90days.scaled,
ep_stroke_t_3years * avg_days_in_year, cutt.off.90days.scaled),
ep_coronary_t_90days = ifelse(ep_coronary_t_3years * avg_days_in_year <= cutt.off.90days.scaled,
ep_coronary_t_3years * avg_days_in_year, cutt.off.90days.scaled),
ep_cvdeath_t_90days = ifelse(ep_cvdeath_t_3years * avg_days_in_year <= cutt.off.90days.scaled,
ep_cvdeath_t_3years * avg_days_in_year, cutt.off.90days.scaled)
)
attach(AEDB)
AEDB[,"epmajor.30days"] <- AEDB$epmajor.3years
AEDB$epmajor.30days[epmajor.3years == 1 & ep_major_t_3years > cutt.off.30days] <- 0
AEDB[,"epstroke.30days"] <- AEDB$epstroke.3years
AEDB$epstroke.30days[epstroke.3years == 1 & ep_stroke_t_3years > cutt.off.30days] <- 0
AEDB[,"epcoronary.30days"] <- AEDB$epcoronary.3years
AEDB$epcoronary.30days[epcoronary.3years == 1 & ep_coronary_t_3years > cutt.off.30days] <- 0
AEDB[,"epcvdeath.30days"] <- AEDB$epcvdeath.3years
AEDB$epcvdeath.30days[epcvdeath.3years == 1 & ep_cvdeath_t_3years > cutt.off.30days] <- 0
AEDB[,"epmajor.90days"] <- AEDB$epmajor.3years
AEDB$epmajor.90days[epmajor.3years == 1 & ep_major_t_3years > cutt.off.90days] <- 0
AEDB[,"epstroke.90days"] <- AEDB$epstroke.3years
AEDB$epstroke.90days[epstroke.3years == 1 & ep_stroke_t_3years > cutt.off.90days] <- 0
AEDB[,"epcoronary.90days"] <- AEDB$epcoronary.3years
AEDB$epcoronary.90days[epcoronary.3years == 1 & ep_coronary_t_3years > cutt.off.90days] <- 0
AEDB[,"epcvdeath.90days"] <- AEDB$epcvdeath.3years
AEDB$epcvdeath.90days[epcvdeath.3years == 1 & ep_cvdeath_t_3years > cutt.off.90days] <- 0
detach(AEDB)
AEDB.temp <- subset(AEDB, select = c("STUDY_NUMBER", "UPID", "Age", "Gender", "Hospital", "Artery_summary",
"epmajor.3years", "epstroke.3years", "epcoronary.3years", "epcvdeath.3years",
"epmajor.30days", "epstroke.30days", "epcoronary.30days", "epcvdeath.30days",
"epmajor.90days", "epstroke.90days", "epcoronary.90days", "epcvdeath.90days"))
require(labelled)
AEDB.temp$Gender <- to_factor(AEDB.temp$Gender)
AEDB.temp$Hospital <- to_factor(AEDB.temp$Hospital)
AEDB.temp$Artery_summary <- to_factor(AEDB.temp$Artery_summary)
DT::datatable(AEDB.temp[1:10,], caption = "Excerpt of the whole AEDB.", rownames = FALSE)
rm(AEDB.temp)
attach(AEDB.CEA)
AEDB.CEA[,"epmajor.30days"] <- AEDB.CEA$epmajor.3years
AEDB.CEA$epmajor.30days[epmajor.3years == 1 & ep_major_t_3years > cutt.off.30days] <- 0
AEDB.CEA[,"epstroke.30days"] <- AEDB.CEA$epstroke.3years
AEDB.CEA$epstroke.30days[epstroke.3years == 1 & ep_stroke_t_3years > cutt.off.30days] <- 0
AEDB.CEA[,"epcoronary.30days"] <- AEDB.CEA$epcoronary.3years
AEDB.CEA$epcoronary.30days[epcoronary.3years == 1 & ep_coronary_t_3years > cutt.off.30days] <- 0
AEDB.CEA[,"epcvdeath.30days"] <- AEDB.CEA$epcvdeath.3years
AEDB.CEA$epcvdeath.30days[epcvdeath.3years == 1 & ep_cvdeath_t_3years > cutt.off.30days] <- 0
AEDB.CEA[,"epmajor.90days"] <- AEDB.CEA$epmajor.3years
AEDB.CEA$epmajor.90days[epmajor.3years == 1 & ep_major_t_3years > cutt.off.90days] <- 0
AEDB.CEA[,"epstroke.90days"] <- AEDB.CEA$epstroke.3years
AEDB.CEA$epstroke.90days[epstroke.3years == 1 & ep_stroke_t_3years > cutt.off.90days] <- 0
AEDB.CEA[,"epcoronary.90days"] <- AEDB.CEA$epcoronary.3years
AEDB.CEA$epcoronary.90days[epcoronary.3years == 1 & ep_coronary_t_3years > cutt.off.90days] <- 0
AEDB.CEA[,"epcvdeath.90days"] <- AEDB.CEA$epcvdeath.3years
AEDB.CEA$epcvdeath.90days[epcvdeath.3years == 1 & ep_cvdeath_t_3years > cutt.off.90days] <- 0
detach(AEDB.CEA)
AEDB.CEA.temp <- subset(AEDB.CEA, select = c("STUDY_NUMBER", "UPID", "Age", "Gender", "Hospital", "Artery_summary",
"epmajor.3years", "epstroke.3years", "epcoronary.3years", "epcvdeath.3years",
"epmajor.30days", "epstroke.30days", "epcoronary.30days", "epcvdeath.30days",
"epmajor.90days", "epstroke.90days", "epcoronary.90days", "epcvdeath.90days"))
require(labelled)
AEDB.CEA.temp$Gender <- to_factor(AEDB.CEA.temp$Gender)
AEDB.CEA.temp$Hospital <- to_factor(AEDB.CEA.temp$Hospital)
AEDB.CEA.temp$Artery_summary <- to_factor(AEDB.CEA.temp$Artery_summary)
DT::datatable(AEDB.CEA.temp[1:10,], caption = "Excerpt of the whole AEDB.CEA.", rownames = FALSE)
rm(AEDB.CEA.temp)
First we do some sanity checks and inventory the time-to-event and event variables.
# Reference: https://bioconductor.org/packages/devel/bioc/vignettes/MultiAssayExperiment/inst/doc/QuickStartMultiAssay.html
# If you want to suppress warnings and messages when installing/loading packages
# suppressPackageStartupMessages({})
install.packages.auto("survival")
Loading required package: survival
install.packages.auto("survminer")
Loading required package: survminer
install.packages.auto("Hmisc")
Loading required package: Hmisc
Loading required package: lattice
Loading required package: Formula
Attaching package: ‘Hmisc’
The following objects are masked from ‘package:dplyr’:
src, summarize
The following objects are masked from ‘package:base’:
format.pval, units
cat("* Creating function to summarize Cox regression and prepare container for results.")
* Creating function to summarize Cox regression and prepare container for results.
# Function to get summary statistics from Cox regression model
COX.STAT <- function(coxfit, DATASET, OUTCOME, protein){
cat("Summarizing Cox regression results for '", protein ,"' and its association to '",OUTCOME,"' in '",DATASET,"'.\n")
if (nrow(summary(coxfit)$coefficients) == 1) {
output = c(protein, rep(NA,8))
cat("Model not fitted; probably singular.\n")
}else {
cat("Collecting data.\n\n")
cox.sum <- summary(coxfit)
cox.effectsize = cox.sum$coefficients[1,1]
cox.SE = cox.sum$coefficients[1,3]
cox.HReffect = cox.sum$coefficients[1,2]
cox.CI_low = exp(cox.effectsize - 1.96 * cox.SE)
cox.CI_up = exp(cox.effectsize + 1.96 * cox.SE)
cox.zvalue = cox.sum$coefficients[1,4]
cox.pvalue = cox.sum$coefficients[1,5]
cox.sample_size = cox.sum$n
cox.nevents = cox.sum$nevent
output = c(DATASET, OUTCOME, protein, cox.effectsize, cox.SE, cox.HReffect, cox.CI_low, cox.CI_up, cox.zvalue, cox.pvalue, cox.sample_size, cox.nevents)
cat("We have collected the following:\n")
cat("Dataset used..............:", DATASET, "\n")
cat("Outcome analyzed..........:", OUTCOME, "\n")
cat("Protein...................:", protein, "\n")
cat("Effect size...............:", round(cox.effectsize, 6), "\n")
cat("Standard error............:", round(cox.SE, 6), "\n")
cat("Odds ratio (effect size)..:", round(cox.HReffect, 3), "\n")
cat("Lower 95% CI..............:", round(cox.CI_low, 3), "\n")
cat("Upper 95% CI..............:", round(cox.CI_up, 3), "\n")
cat("T-value...................:", round(cox.zvalue, 6), "\n")
cat("P-value...................:", signif(cox.pvalue, 8), "\n")
cat("Sample size in model......:", cox.sample_size, "\n")
cat("Number of events..........:", cox.nevents, "\n")
}
return(output)
print(output)
}
times = c("ep_major_t_3years",
"ep_stroke_t_3years", "ep_coronary_t_3years", "ep_cvdeath_t_3years")
endpoints = c("epmajor.3years",
"epstroke.3years", "epcoronary.3years", "epcvdeath.3years")
cat("* Check the cases per event type - for sanity.")
* Check the cases per event type - for sanity.
for (events in endpoints){
require(labelled)
print(paste0("Printing the summary of: ",events))
# print(summary(AEDB.CEA[,events]))
print(table(AEDB.CEA[,events]))
}
[1] "Printing the summary of: epmajor.3years"
0 1
2033 265
[1] "Printing the summary of: epstroke.3years"
0 1
2169 130
[1] "Printing the summary of: epcoronary.3years"
0 1
2117 182
[1] "Printing the summary of: epcvdeath.3years"
0 1
2208 90
cat("* Check distribution of events over time - for sanity.")
* Check distribution of events over time - for sanity.
for (eventtimes in times){
print(paste0("Printing the summary of: ",eventtimes))
print(summary(AEDB.CEA[,eventtimes]))
}
[1] "Printing the summary of: ep_major_t_3years"
ep_major_t_3years
Min. :0.000
1st Qu.:2.708
Median :3.000
Mean :2.573
3rd Qu.:3.000
Max. :3.000
NA's :125
[1] "Printing the summary of: ep_stroke_t_3years"
ep_stroke_t_3years
Min. :0.000
1st Qu.:2.877
Median :3.000
Mean :2.623
3rd Qu.:3.000
Max. :3.000
NA's :125
[1] "Printing the summary of: ep_coronary_t_3years"
ep_coronary_t_3years
Min. :0.000
1st Qu.:2.783
Median :3.000
Mean :2.622
3rd Qu.:3.000
Max. :3.000
NA's :125
[1] "Printing the summary of: ep_cvdeath_t_3years"
ep_cvdeath_t_3years
Min. :0.00274
1st Qu.:2.91233
Median :3.00000
Mean :2.70878
3rd Qu.:3.00000
Max. :3.00000
NA's :125
for (eventtime in times){
print(paste0("Printing the distribution of: ",eventtime))
p <- gghistogram(AEDB.CEA, x = eventtime, y = "..count..",
main = eventtime, bins = 15,
xlab = "year", color = uithof_color[16], fill = uithof_color[16], ggtheme = theme_minimal())
print(p)
ggsave(file = paste0(PLOT_loc, "/",Today,".AEDB.CEA.EventDistributionPerYear.",eventtime,".pdf"), plot = last_plot())
}
[1] "Printing the distribution of: ep_major_t_3years"
[1] "Printing the distribution of: ep_stroke_t_3years"
[1] "Printing the distribution of: ep_coronary_t_3years"
[1] "Printing the distribution of: ep_cvdeath_t_3years"
times30 = c("ep_major_t_30days",
"ep_stroke_t_30days", "ep_coronary_t_30days", "ep_cvdeath_t_30days")
endpoints30 = c("epmajor.30days",
"epstroke.30days", "epcoronary.30days", "epcvdeath.30days")
cat("* Check the cases per event type - for sanity.")
* Check the cases per event type - for sanity.
for (events in endpoints30){
print(paste0("Printing the summary of: ",events))
# print(summary(AEDB.CEA[,events]))
print(table(AEDB.CEA[,events]))
}
[1] "Printing the summary of: epmajor.30days"
0 1
2220 78
[1] "Printing the summary of: epstroke.30days"
0 1
2246 53
[1] "Printing the summary of: epcoronary.30days"
0 1
2265 34
[1] "Printing the summary of: epcvdeath.30days"
0 1
2286 12
cat("* Check distribution of events over time - for sanity.")
* Check distribution of events over time - for sanity.
for (eventtimes in times30){
print(paste0("Printing the summary of: ",eventtimes))
print(summary(AEDB.CEA[,eventtimes]))
}
[1] "Printing the summary of: ep_major_t_30days"
ep_major_t_30days
Min. : 0.00
1st Qu.:30.00
Median :30.00
Mean :29.09
3rd Qu.:30.00
Max. :30.00
NA's :125
[1] "Printing the summary of: ep_stroke_t_30days"
ep_stroke_t_30days
Min. : 0.00
1st Qu.:30.00
Median :30.00
Mean :29.32
3rd Qu.:30.00
Max. :30.00
NA's :125
[1] "Printing the summary of: ep_coronary_t_30days"
ep_coronary_t_30days
Min. : 0.00
1st Qu.:30.00
Median :30.00
Mean :29.54
3rd Qu.:30.00
Max. :30.00
NA's :125
[1] "Printing the summary of: ep_cvdeath_t_30days"
ep_cvdeath_t_30days
Min. : 1.001
1st Qu.:30.000
Median :30.000
Mean :29.854
3rd Qu.:30.000
Max. :30.000
NA's :125
for (eventtime in times30){
print(paste0("Printing the distribution of: ",eventtime))
p <- gghistogram(AEDB.CEA, x = eventtime, y = "..count..",
main = eventtime, bins = 15,
xlab = "days", color = uithof_color[16], fill = uithof_color[16], ggtheme = theme_minimal())
print(p)
ggsave(file = paste0(PLOT_loc, "/",Today,".AEDB.CEA.EventDistributionPer30Days.",eventtime,".pdf"), plot = last_plot())
}
[1] "Printing the distribution of: ep_major_t_30days"
[1] "Printing the distribution of: ep_stroke_t_30days"
[1] "Printing the distribution of: ep_coronary_t_30days"
[1] "Printing the distribution of: ep_cvdeath_t_30days"
times90 = c("ep_major_t_90days",
"ep_stroke_t_90days", "ep_coronary_t_90days", "ep_cvdeath_t_90days")
endpoints90 = c("epmajor.90days",
"epstroke.90days", "epcoronary.90days", "epcvdeath.90days")
cat("* Check the cases per event type - for sanity.")
* Check the cases per event type - for sanity.
for (events in endpoints90){
print(paste0("Printing the summary of: ",events))
# print(summary(AEDB.CEA[,events]))
print(table(AEDB.CEA[,events]))
}
[1] "Printing the summary of: epmajor.90days"
0 1
2204 94
[1] "Printing the summary of: epstroke.90days"
0 1
2239 60
[1] "Printing the summary of: epcoronary.90days"
0 1
2255 44
[1] "Printing the summary of: epcvdeath.90days"
0 1
2279 19
cat("* Check distribution of events over time - for sanity.")
* Check distribution of events over time - for sanity.
for (eventtimes in times90){
print(paste0("Printing the summary of: ",eventtimes))
print(summary(AEDB.CEA[,eventtimes]))
}
[1] "Printing the summary of: ep_major_t_90days"
ep_major_t_90days
Min. : 0.00
1st Qu.:90.00
Median :90.00
Mean :86.74
3rd Qu.:90.00
Max. :90.00
NA's :125
[1] "Printing the summary of: ep_stroke_t_90days"
ep_stroke_t_90days
Min. : 0.00
1st Qu.:90.00
Median :90.00
Mean :87.51
3rd Qu.:90.00
Max. :90.00
NA's :125
[1] "Printing the summary of: ep_coronary_t_90days"
ep_coronary_t_90days
Min. : 0.0
1st Qu.:90.0
Median :90.0
Mean :88.2
3rd Qu.:90.0
Max. :90.0
NA's :125
[1] "Printing the summary of: ep_cvdeath_t_90days"
ep_cvdeath_t_90days
Min. : 1.001
1st Qu.:90.000
Median :90.000
Mean :89.320
3rd Qu.:90.000
Max. :90.000
NA's :125
for (eventtime in times90){
print(paste0("Printing the distribution of: ",eventtime))
p <- gghistogram(AEDB.CEA, x = eventtime, y = "..count..",
main = eventtime, bins = 15,
xlab = "days", color = uithof_color[16], fill = uithof_color[16], ggtheme = theme_minimal())
print(p)
ggsave(file = paste0(PLOT_loc, "/",Today,".AEDB.CEA.EventDistributionPer90Days.",eventtime,".pdf"), plot = last_plot())
}
[1] "Printing the distribution of: ep_major_t_90days"
[1] "Printing the distribution of: ep_stroke_t_90days"
[1] "Printing the distribution of: ep_coronary_t_90days"
[1] "Printing the distribution of: ep_cvdeath_t_90days"
Let’s perform the actual Cox-regressions. We will apply a couple of models:
MODEL 1
# Set up a dataframe to receive results
COX.results <- data.frame(matrix(NA, ncol = 12, nrow = 0))
# Looping over each protein/endpoint/time combination
for (i in 1:length(times)){
eptime = times[i]
ep = endpoints[i]
cat(paste0("* Analyzing the effect of plaque proteins on [",ep,"].\n"))
cat(" - creating temporary SE for this work.\n")
TEMP.DF = as.data.frame(AEDB.CEA)
cat(" - making a 'Surv' object and adding this to temporary dataframe.\n")
TEMP.DF$event <- as.integer(TEMP.DF[,ep])
TEMP.DF$y <- Surv(time = TEMP.DF[,eptime], event = TEMP.DF$event)
cat(" - making strata of each of the plaque proteins and start survival analysis.\n")
for (protein in 1:length(TRAITS.PROTEIN.RANK)){
cat(paste0(" > processing [",TRAITS.PROTEIN.RANK[protein],"]; ",protein," out of ",length(TRAITS.PROTEIN.RANK)," proteins.\n"))
# splitting into two groups
TEMP.DF[[ TRAITS.PROTEIN.RANK[protein] ]] <- cut2(TEMP.DF[,TRAITS.PROTEIN.RANK[protein]], g = 2)
cat(paste0(" > cross tabulation of ",TRAITS.PROTEIN.RANK[protein],"-stratum.\n"))
show(table(TEMP.DF[[ TRAITS.PROTEIN.RANK[protein] ]]))
cat(paste0("\n > fitting the model for ",TRAITS.PROTEIN.RANK[protein],"-stratum.\n"))
fit <- survfit(as.formula(paste0("y ~ ", TRAITS.PROTEIN.RANK[protein])), data = TEMP.DF)
cat(paste0("\n > make a Kaplan-Meier-shizzle...\n"))
# make Kaplan-Meier curve and save it
show(ggsurvplot(fit, data = TEMP.DF,
palette = c("#DB003F", "#1290D9"),
# palete = c("F59D10", "#DB003F", "#49A01D", "#1290D9"),
linetype = c(1,2),
# linetype = c(1,2,3,4),
# conf.int = FALSE, conf.int.fill = "#595A5C", conf.int.alpha = 0.1,
pval = FALSE, pval.method = FALSE, pval.size = 4,
risk.table = TRUE, risk.table.y.text = FALSE, tables.y.text.col = TRUE, fontsize = 4,
censor = FALSE,
legend = "right",
legend.title = paste0("",TRAITS.PROTEIN.RANK[protein],""),
legend.labs = c("low", "high"),
title = paste0("Risk of ",ep,""), xlab = "Time [years]", font.main = c(16, "bold", "black")))
dev.copy2pdf(file = paste0(COX_loc,"/",
Today,".AEDB.CEA.survival.",ep,".2G.",
TRAITS.PROTEIN.RANK[protein],".pdf"), width = 12, height = 10, onefile = FALSE)
cat(paste0("\n > perform the Cox-regression fashizzle and plot it...\n"))
### Do Cox-regression and plot it
### MODEL 1 (Simple model)
cox = coxph(Surv(TEMP.DF[,eptime], event) ~ TEMP.DF[[ TRAITS.PROTEIN.RANK[protein] ]]+Age+Gender, data = TEMP.DF)
coxplot = coxph(Surv(TEMP.DF[,eptime], event) ~ strata(TEMP.DF[[ TRAITS.PROTEIN.RANK[protein] ]])+Age+Gender, data = TEMP.DF)
plot(survfit(coxplot), main = paste0("Cox proportional hazard of [",ep,"] per [",eptime,"]."),
# ylim = c(0.2, 1), xlim = c(0,3), col = c("#595A5C", "#DB003F", "#1290D9"),
ylim = c(0, 1), xlim = c(0,3), col = c("#DB003F", "#1290D9"),
lty = c(1,2), lwd = 2,
ylab = "Suvival probability", xlab = "FU time [years]",
mark.time = FALSE, axes = FALSE, bty = "n")
legend("topright",
c("low", "high"),
title = paste0("",TRAITS.PROTEIN.RANK[protein],""),
col = c("#DB003F", "#1290D9"),
lty = c(1,2), lwd = 2,
bty = "n")
axis(side = 1, at = seq(0, 3, by = 1))
axis(side = 2, at = seq(0, 1, by = 0.2))
dev.copy2pdf(file = paste0(COX_loc,"/",
Today,".AEDB.CEA.Cox.",ep,".2G.",
# Today,".AEDB.CEA.Cox.",ep,".4G.",
TRAITS.PROTEIN.RANK[protein],".MODEL1.pdf"), height = 12, width = 10, onefile = TRUE)
show(summary(cox))
cat(paste0("\n > writing the Cox-regression fashizzle to Excel...\n"))
COX.results.TEMP <- data.frame(matrix(NA, ncol = 12, nrow = 0))
COX.results.TEMP[1,] = COX.STAT(cox, "AEDB.CEA", ep, TRAITS.PROTEIN.RANK[protein])
COX.results = rbind(COX.results, COX.results.TEMP)
}
}
* Analyzing the effect of plaque proteins on [epmajor.3years].
- creating temporary SE for this work.
- making a 'Surv' object and adding this to temporary dataframe.
- making strata of each of the plaque proteins and start survival analysis.
> processing [MCP1_pg_ug_2015_rank]; 1 out of 1 proteins.
> cross tabulation of MCP1_pg_ug_2015_rank-stratum.
[-3.34055,0.00105) [ 0.00105,3.34055]
598 598
> fitting the model for MCP1_pg_ug_2015_rank-stratum.
> make a Kaplan-Meier-shizzle...
Vectorized input to `element_text()` is not officially supported.
Results may be unexpected or may change in future versions of ggplot2.
> perform the Cox-regression fashizzle and plot it...
Call:
coxph(formula = Surv(TEMP.DF[, eptime], event) ~ TEMP.DF[[TRAITS.PROTEIN.RANK[protein]]] +
Age + Gender, data = TEMP.DF)
n= 1184, number of events= 139
(1237 observations deleted due to missingness)
coef exp(coef) se(coef) z Pr(>|z|)
TEMP.DF[[TRAITS.PROTEIN.RANK[protein]]][ 0.00105,3.34055] 0.037093 1.037789 0.169988 0.218 0.827268
Age 0.033469 1.034035 0.009876 3.389 0.000702 ***
Gendermale 0.336307 1.399769 0.199676 1.684 0.092131 .
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
exp(coef) exp(-coef) lower .95 upper .95
TEMP.DF[[TRAITS.PROTEIN.RANK[protein]]][ 0.00105,3.34055] 1.038 0.9636 0.7437 1.448
Age 1.034 0.9671 1.0142 1.054
Gendermale 1.400 0.7144 0.9464 2.070
Concordance= 0.588 (se = 0.025 )
Likelihood ratio test= 15.11 on 3 df, p=0.002
Wald test = 14.36 on 3 df, p=0.002
Score (logrank) test = 14.43 on 3 df, p=0.002
> writing the Cox-regression fashizzle to Excel...
Summarizing Cox regression results for ' MCP1_pg_ug_2015_rank ' and its association to ' epmajor.3years ' in ' AEDB.CEA '.
Collecting data.
We have collected the following:
Dataset used..............: AEDB.CEA
Outcome analyzed..........: epmajor.3years
Protein...................: MCP1_pg_ug_2015_rank
Effect size...............: 0.037093
Standard error............: 0.169988
Odds ratio (effect size)..: 1.038
Lower 95% CI..............: 0.744
Upper 95% CI..............: 1.448
T-value...................: 0.218206
P-value...................: 0.8272683
Sample size in model......: 1184
Number of events..........: 139
* Analyzing the effect of plaque proteins on [epstroke.3years].
- creating temporary SE for this work.
- making a 'Surv' object and adding this to temporary dataframe.
- making strata of each of the plaque proteins and start survival analysis.
> processing [MCP1_pg_ug_2015_rank]; 1 out of 1 proteins.
> cross tabulation of MCP1_pg_ug_2015_rank-stratum.
[-3.34055,0.00105) [ 0.00105,3.34055]
598 598
> fitting the model for MCP1_pg_ug_2015_rank-stratum.
> make a Kaplan-Meier-shizzle...
Vectorized input to `element_text()` is not officially supported.
Results may be unexpected or may change in future versions of ggplot2.
> perform the Cox-regression fashizzle and plot it...
Call:
coxph(formula = Surv(TEMP.DF[, eptime], event) ~ TEMP.DF[[TRAITS.PROTEIN.RANK[protein]]] +
Age + Gender, data = TEMP.DF)
n= 1184, number of events= 73
(1237 observations deleted due to missingness)
coef exp(coef) se(coef) z Pr(>|z|)
TEMP.DF[[TRAITS.PROTEIN.RANK[protein]]][ 0.00105,3.34055] 0.13646 1.14621 0.23508 0.580 0.5616
Age 0.03440 1.03500 0.01356 2.537 0.0112 *
Gendermale 0.06281 1.06483 0.25901 0.243 0.8084
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
exp(coef) exp(-coef) lower .95 upper .95
TEMP.DF[[TRAITS.PROTEIN.RANK[protein]]][ 0.00105,3.34055] 1.146 0.8724 0.7230 1.817
Age 1.035 0.9662 1.0079 1.063
Gendermale 1.065 0.9391 0.6409 1.769
Concordance= 0.597 (se = 0.033 )
Likelihood ratio test= 7.12 on 3 df, p=0.07
Wald test = 6.89 on 3 df, p=0.08
Score (logrank) test = 6.92 on 3 df, p=0.07
> writing the Cox-regression fashizzle to Excel...
Summarizing Cox regression results for ' MCP1_pg_ug_2015_rank ' and its association to ' epstroke.3years ' in ' AEDB.CEA '.
Collecting data.
We have collected the following:
Dataset used..............: AEDB.CEA
Outcome analyzed..........: epstroke.3years
Protein...................: MCP1_pg_ug_2015_rank
Effect size...............: 0.136457
Standard error............: 0.235075
Odds ratio (effect size)..: 1.146
Lower 95% CI..............: 0.723
Upper 95% CI..............: 1.817
T-value...................: 0.580483
P-value...................: 0.5615888
Sample size in model......: 1184
Number of events..........: 73
* Analyzing the effect of plaque proteins on [epcoronary.3years].
- creating temporary SE for this work.
- making a 'Surv' object and adding this to temporary dataframe.
- making strata of each of the plaque proteins and start survival analysis.
> processing [MCP1_pg_ug_2015_rank]; 1 out of 1 proteins.
> cross tabulation of MCP1_pg_ug_2015_rank-stratum.
[-3.34055,0.00105) [ 0.00105,3.34055]
598 598
> fitting the model for MCP1_pg_ug_2015_rank-stratum.
> make a Kaplan-Meier-shizzle...
Vectorized input to `element_text()` is not officially supported.
Results may be unexpected or may change in future versions of ggplot2.
> perform the Cox-regression fashizzle and plot it...
Call:
coxph(formula = Surv(TEMP.DF[, eptime], event) ~ TEMP.DF[[TRAITS.PROTEIN.RANK[protein]]] +
Age + Gender, data = TEMP.DF)
n= 1184, number of events= 91
(1237 observations deleted due to missingness)
coef exp(coef) se(coef) z Pr(>|z|)
TEMP.DF[[TRAITS.PROTEIN.RANK[protein]]][ 0.00105,3.34055] -0.174454 0.839916 0.210481 -0.829 0.4072
Age 0.007135 1.007160 0.011848 0.602 0.5471
Gendermale 0.666235 1.946893 0.269214 2.475 0.0133 *
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
exp(coef) exp(-coef) lower .95 upper .95
TEMP.DF[[TRAITS.PROTEIN.RANK[protein]]][ 0.00105,3.34055] 0.8399 1.1906 0.556 1.269
Age 1.0072 0.9929 0.984 1.031
Gendermale 1.9469 0.5136 1.149 3.300
Concordance= 0.577 (se = 0.03 )
Likelihood ratio test= 7.93 on 3 df, p=0.05
Wald test = 7.07 on 3 df, p=0.07
Score (logrank) test = 7.29 on 3 df, p=0.06
> writing the Cox-regression fashizzle to Excel...
Summarizing Cox regression results for ' MCP1_pg_ug_2015_rank ' and its association to ' epcoronary.3years ' in ' AEDB.CEA '.
Collecting data.
We have collected the following:
Dataset used..............: AEDB.CEA
Outcome analyzed..........: epcoronary.3years
Protein...................: MCP1_pg_ug_2015_rank
Effect size...............: -0.174454
Standard error............: 0.210481
Odds ratio (effect size)..: 0.84
Lower 95% CI..............: 0.556
Upper 95% CI..............: 1.269
T-value...................: -0.828833
P-value...................: 0.4071991
Sample size in model......: 1184
Number of events..........: 91
* Analyzing the effect of plaque proteins on [epcvdeath.3years].
- creating temporary SE for this work.
- making a 'Surv' object and adding this to temporary dataframe.
- making strata of each of the plaque proteins and start survival analysis.
> processing [MCP1_pg_ug_2015_rank]; 1 out of 1 proteins.
> cross tabulation of MCP1_pg_ug_2015_rank-stratum.
[-3.34055,0.00105) [ 0.00105,3.34055]
598 598
> fitting the model for MCP1_pg_ug_2015_rank-stratum.
> make a Kaplan-Meier-shizzle...
Vectorized input to `element_text()` is not officially supported.
Results may be unexpected or may change in future versions of ggplot2.
> perform the Cox-regression fashizzle and plot it...
Call:
coxph(formula = Surv(TEMP.DF[, eptime], event) ~ TEMP.DF[[TRAITS.PROTEIN.RANK[protein]]] +
Age + Gender, data = TEMP.DF)
n= 1184, number of events= 45
(1237 observations deleted due to missingness)
coef exp(coef) se(coef) z Pr(>|z|)
TEMP.DF[[TRAITS.PROTEIN.RANK[protein]]][ 0.00105,3.34055] -0.11090 0.89502 0.29877 -0.371 0.7105
Age 0.08444 1.08811 0.01930 4.375 1.21e-05 ***
Gendermale 0.89483 2.44691 0.41200 2.172 0.0299 *
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
exp(coef) exp(-coef) lower .95 upper .95
TEMP.DF[[TRAITS.PROTEIN.RANK[protein]]][ 0.00105,3.34055] 0.895 1.1173 0.4983 1.608
Age 1.088 0.9190 1.0477 1.130
Gendermale 2.447 0.4087 1.0913 5.487
Concordance= 0.71 (se = 0.036 )
Likelihood ratio test= 26.84 on 3 df, p=6e-06
Wald test = 23.16 on 3 df, p=4e-05
Score (logrank) test = 23.81 on 3 df, p=3e-05
> writing the Cox-regression fashizzle to Excel...
Summarizing Cox regression results for ' MCP1_pg_ug_2015_rank ' and its association to ' epcvdeath.3years ' in ' AEDB.CEA '.
Collecting data.
We have collected the following:
Dataset used..............: AEDB.CEA
Outcome analyzed..........: epcvdeath.3years
Protein...................: MCP1_pg_ug_2015_rank
Effect size...............: -0.110904
Standard error............: 0.298774
Odds ratio (effect size)..: 0.895
Lower 95% CI..............: 0.498
Upper 95% CI..............: 1.608
T-value...................: -0.371199
P-value...................: 0.7104893
Sample size in model......: 1184
Number of events..........: 45
cat("- Edit the column names...\n")
- Edit the column names...
colnames(COX.results) = c("Dataset", "Outcome", "CpG",
"Beta", "s.e.m.",
"HR", "low95CI", "up95CI",
"Z-value", "P-value", "SampleSize", "N_events")
cat("- Correct the variable types...\n")
- Correct the variable types...
COX.results$Beta <- as.numeric(COX.results$Beta)
COX.results$s.e.m. <- as.numeric(COX.results$s.e.m.)
COX.results$HR <- as.numeric(COX.results$HR)
COX.results$low95CI <- as.numeric(COX.results$low95CI)
COX.results$up95CI <- as.numeric(COX.results$up95CI)
COX.results$`Z-value` <- as.numeric(COX.results$`Z-value`)
COX.results$`P-value` <- as.numeric(COX.results$`P-value`)
COX.results$SampleSize <- as.numeric(COX.results$SampleSize)
COX.results$N_events <- as.numeric(COX.results$N_events)
AEDB.CEA.COX.results <- COX.results
# Save the data
cat("- Writing results to Excel-file...\n")
- Writing results to Excel-file...
head.style <- createStyle(textDecoration = "BOLD")
write.xlsx(AEDB.CEA.COX.results,
file = paste0(OUT_loc, "/",Today,".AEDB.CEA.Cox.2G.MODEL1.xlsx"),
creator = "Sander W. van der Laan",
sheetName = "Results", headerStyle = head.style,
row.names = FALSE, col.names = TRUE, overwrite = TRUE)
# Removing intermediates
cat("- Removing intermediate files...\n")
- Removing intermediate files...
#rm(TEMP.DF, protein, fit, cox, coxplot, COX.results, COX.results.TEMP, head.style, AEDB.CEA.COX.results)
#rm(head.style)
MODEL 2
# Set up a dataframe to receive results
COX.results <- data.frame(matrix(NA, ncol = 12, nrow = 0))
# Looping over each protein/endpoint/time combination
for (i in 1:length(times)){
eptime = times[i]
ep = endpoints[i]
cat(paste0("* Analyzing the effect of plaque proteins on [",ep,"].\n"))
cat(" - creating temporary SE for this work.\n")
TEMP.DF = as.data.frame(AEDB.CEA)
cat(" - making a 'Surv' object and adding this to temporary dataframe.\n")
TEMP.DF$event <- as.integer(TEMP.DF[,ep])
#as.integer(TEMP.DF[,ep] == "Excluded")
TEMP.DF$y <- Surv(time = TEMP.DF[,eptime], event = TEMP.DF$event)
cat(" - making strata of each of the plaque proteins and start survival analysis.\n")
for (protein in 1:length(TRAITS.PROTEIN.RANK)){
cat(paste0(" > processing [",TRAITS.PROTEIN.RANK[protein],"]; ",protein," out of ",length(TRAITS.PROTEIN.RANK)," proteins.\n"))
# splitting into two groups
TEMP.DF[[ TRAITS.PROTEIN.RANK[protein] ]] <- cut2(TEMP.DF[,TRAITS.PROTEIN.RANK[protein]], g = 2)
cat(paste0(" > cross tabulation of ",TRAITS.PROTEIN.RANK[protein],"-stratum.\n"))
show(table(TEMP.DF[[ TRAITS.PROTEIN.RANK[protein] ]]))
cat(paste0("\n > fitting the model for ",TRAITS.PROTEIN.RANK[protein],"-stratum.\n"))
fit <- survfit(as.formula(paste0("y ~ ", TRAITS.PROTEIN.RANK[protein])), data = TEMP.DF)
cat(paste0("\n > make a Kaplan-Meier-shizzle...\n"))
# make Kaplan-Meier curve and save it
show(ggsurvplot(fit, data = TEMP.DF,
palette = c("#DB003F", "#1290D9"),
# palete = c("F59D10", "#DB003F", "#49A01D", "#1290D9"),
linetype = c(1,2),
# linetype = c(1,2,3,4),
# conf.int = FALSE, conf.int.fill = "#595A5C", conf.int.alpha = 0.1,
pval = FALSE, pval.method = FALSE, pval.size = 4,
risk.table = TRUE, risk.table.y.text = FALSE, tables.y.text.col = TRUE, fontsize = 4,
censor = FALSE,
legend = "right",
legend.title = paste0("",TRAITS.PROTEIN.RANK[protein],""),
legend.labs = c("low", "high"),
title = paste0("Risk of ",ep,""), xlab = "Time [years]", font.main = c(16, "bold", "black")))
dev.copy2pdf(file = paste0(COX_loc,"/",
Today,".AEDB.CEA.survival.",ep,".2G.",
TRAITS.PROTEIN.RANK[protein],".pdf"), width = 12, height = 10, onefile = FALSE)
cat(paste0("\n > perform the Cox-regression fashizzle and plot it...\n"))
### Do Cox-regression and plot it
### MODEL 2 adjusted for age, sex, hypertension, diabetes, smoking, LDL-C levels, lipid-lowering drugs, antiplatelet drugs, eGFR, BMI, history of CVD, level of stenosis
cox = coxph(Surv(TEMP.DF[,eptime], event) ~ TEMP.DF[[ TRAITS.PROTEIN.RANK[protein] ]]+Age + Gender + Hypertension.composite + DiabetesStatus + SmokerStatus + Med.Statin.LLD + Med.all.antiplatelet + GFR_MDRD + BMI + MedHx_CVD + stenose, data = TEMP.DF)
coxplot = coxph(Surv(TEMP.DF[,eptime], event) ~ strata(TEMP.DF[[ TRAITS.PROTEIN.RANK[protein] ]])+Age + Gender + Hypertension.composite + DiabetesStatus + SmokerStatus + Med.Statin.LLD + Med.all.antiplatelet + GFR_MDRD + BMI + MedHx_CVD + stenose, data = TEMP.DF)
plot(survfit(coxplot), main = paste0("Cox proportional hazard of [",ep,"] per [",eptime,"]."),
# ylim = c(0.2, 1), xlim = c(0,3), col = c("#595A5C", "#DB003F", "#1290D9"),
ylim = c(0, 1), xlim = c(0,3), col = c("#DB003F", "#1290D9"),
lty = c(1,2), lwd = 2,
ylab = "Suvival probability", xlab = "FU time [years]",
mark.time = FALSE, axes = FALSE, bty = "n")
legend("topright",
c("low", "high"),
title = paste0("",TRAITS.PROTEIN.RANK[protein],""),
col = c("#DB003F", "#1290D9"),
lty = c(1,2), lwd = 2,
bty = "n")
axis(side = 1, at = seq(0, 3, by = 1))
axis(side = 2, at = seq(0, 1, by = 0.2))
dev.copy2pdf(file = paste0(COX_loc,"/",
Today,".AEDB.CEA.Cox.",ep,".2G.",
# Today,".AEDB.CEA.Cox.",ep,".4G.",
TRAITS.PROTEIN.RANK[protein],".MODEL2.pdf"), height = 12, width = 10, onefile = TRUE)
show(summary(cox))
cat(paste0("\n > writing the Cox-regression fashizzle to Excel...\n"))
COX.results.TEMP <- data.frame(matrix(NA, ncol = 12, nrow = 0))
COX.results.TEMP[1,] = COX.STAT(cox, "AEDB.CEA", ep, TRAITS.PROTEIN.RANK[protein])
COX.results = rbind(COX.results, COX.results.TEMP)
}
}
* Analyzing the effect of plaque proteins on [epmajor.3years].
- creating temporary SE for this work.
- making a 'Surv' object and adding this to temporary dataframe.
- making strata of each of the plaque proteins and start survival analysis.
> processing [MCP1_pg_ug_2015_rank]; 1 out of 1 proteins.
> cross tabulation of MCP1_pg_ug_2015_rank-stratum.
[-3.34055,0.00105) [ 0.00105,3.34055]
598 598
> fitting the model for MCP1_pg_ug_2015_rank-stratum.
> make a Kaplan-Meier-shizzle...
Vectorized input to `element_text()` is not officially supported.
Results may be unexpected or may change in future versions of ggplot2.
> perform the Cox-regression fashizzle and plot it...
Call:
coxph(formula = Surv(TEMP.DF[, eptime], event) ~ TEMP.DF[[TRAITS.PROTEIN.RANK[protein]]] +
Age + Gender + Hypertension.composite + DiabetesStatus +
SmokerStatus + Med.Statin.LLD + Med.all.antiplatelet + GFR_MDRD +
BMI + MedHx_CVD + stenose, data = TEMP.DF)
n= 1027, number of events= 115
(1394 observations deleted due to missingness)
coef exp(coef) se(coef) z Pr(>|z|)
TEMP.DF[[TRAITS.PROTEIN.RANK[protein]]][ 0.00105,3.34055] 1.327e-01 1.142e+00 1.898e-01 0.699 0.4843
Age 3.278e-02 1.033e+00 1.275e-02 2.572 0.0101 *
Gendermale 3.790e-01 1.461e+00 2.280e-01 1.662 0.0964 .
Hypertension.compositeno -4.193e-01 6.575e-01 3.564e-01 -1.177 0.2393
Hypertension.compositeyes NA NA 0.000e+00 NA NA
DiabetesStatusDiabetes -1.633e-02 9.838e-01 2.234e-01 -0.073 0.9417
SmokerStatusEx-smoker -5.028e-01 6.049e-01 2.095e-01 -2.400 0.0164 *
SmokerStatusNever smoked -8.142e-01 4.430e-01 3.415e-01 -2.385 0.0171 *
Med.Statin.LLDno 2.579e-01 1.294e+00 2.155e-01 1.197 0.2314
Med.Statin.LLDyes NA NA 0.000e+00 NA NA
Med.all.antiplateletno 4.283e-01 1.535e+00 2.635e-01 1.626 0.1040
Med.all.antiplateletyes NA NA 0.000e+00 NA NA
GFR_MDRD -1.969e-02 9.805e-01 4.925e-03 -3.997 6.42e-05 ***
BMI 5.402e-02 1.056e+00 2.613e-02 2.067 0.0387 *
MedHx_CVDyes 5.306e-01 1.700e+00 2.220e-01 2.391 0.0168 *
stenose0-49% -1.560e+01 1.677e-07 2.462e+03 -0.006 0.9949
stenose50-70% -8.183e-01 4.412e-01 8.701e-01 -0.941 0.3470
stenose70-90% -2.441e-01 7.834e-01 7.298e-01 -0.335 0.7380
stenose90-99% -2.079e-01 8.123e-01 7.282e-01 -0.285 0.7753
stenose100% (Occlusion) -4.754e-02 9.536e-01 1.244e+00 -0.038 0.9695
stenoseNA NA NA 0.000e+00 NA NA
stenose50-99% -1.529e+01 2.289e-07 2.920e+03 -0.005 0.9958
stenose70-99% NA NA 0.000e+00 NA NA
stenose99 NA NA 0.000e+00 NA NA
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
exp(coef) exp(-coef) lower .95 upper .95
TEMP.DF[[TRAITS.PROTEIN.RANK[protein]]][ 0.00105,3.34055] 1.142e+00 8.757e-01 0.78722 1.6565
Age 1.033e+00 9.678e-01 1.00783 1.0595
Gendermale 1.461e+00 6.845e-01 0.93440 2.2840
Hypertension.compositeno 6.575e-01 1.521e+00 0.32699 1.3220
Hypertension.compositeyes NA NA NA NA
DiabetesStatusDiabetes 9.838e-01 1.016e+00 0.63498 1.5242
SmokerStatusEx-smoker 6.049e-01 1.653e+00 0.40120 0.9119
SmokerStatusNever smoked 4.430e-01 2.257e+00 0.22685 0.8650
Med.Statin.LLDno 1.294e+00 7.727e-01 0.84830 1.9746
Med.Statin.LLDyes NA NA NA NA
Med.all.antiplateletno 1.535e+00 6.516e-01 0.91566 2.5720
Med.all.antiplateletyes NA NA NA NA
GFR_MDRD 9.805e-01 1.020e+00 0.97109 0.9900
BMI 1.056e+00 9.474e-01 1.00281 1.1110
MedHx_CVDyes 1.700e+00 5.882e-01 1.10031 2.6266
stenose0-49% 1.677e-07 5.962e+06 0.00000 Inf
stenose50-70% 4.412e-01 2.267e+00 0.08017 2.4278
stenose70-90% 7.834e-01 1.276e+00 0.18741 3.2747
stenose90-99% 8.123e-01 1.231e+00 0.19494 3.3848
stenose100% (Occlusion) 9.536e-01 1.049e+00 0.08333 10.9119
stenoseNA NA NA NA NA
stenose50-99% 2.289e-07 4.369e+06 0.00000 Inf
stenose70-99% NA NA NA NA
stenose99 NA NA NA NA
Concordance= 0.697 (se = 0.023 )
Likelihood ratio test= 63.79 on 18 df, p=5e-07
Wald test = 58.83 on 18 df, p=3e-06
Score (logrank) test = 62.19 on 18 df, p=9e-07
> writing the Cox-regression fashizzle to Excel...
Summarizing Cox regression results for ' MCP1_pg_ug_2015_rank ' and its association to ' epmajor.3years ' in ' AEDB.CEA '.
Collecting data.
We have collected the following:
Dataset used..............: AEDB.CEA
Outcome analyzed..........: epmajor.3years
Protein...................: MCP1_pg_ug_2015_rank
Effect size...............: 0.132746
Standard error............: 0.189795
Odds ratio (effect size)..: 1.142
Lower 95% CI..............: 0.787
Upper 95% CI..............: 1.657
T-value...................: 0.699416
P-value...................: 0.4842918
Sample size in model......: 1027
Number of events..........: 115
* Analyzing the effect of plaque proteins on [epstroke.3years].
- creating temporary SE for this work.
- making a 'Surv' object and adding this to temporary dataframe.
- making strata of each of the plaque proteins and start survival analysis.
> processing [MCP1_pg_ug_2015_rank]; 1 out of 1 proteins.
> cross tabulation of MCP1_pg_ug_2015_rank-stratum.
[-3.34055,0.00105) [ 0.00105,3.34055]
598 598
> fitting the model for MCP1_pg_ug_2015_rank-stratum.
> make a Kaplan-Meier-shizzle...
Vectorized input to `element_text()` is not officially supported.
Results may be unexpected or may change in future versions of ggplot2.
> perform the Cox-regression fashizzle and plot it...
Call:
coxph(formula = Surv(TEMP.DF[, eptime], event) ~ TEMP.DF[[TRAITS.PROTEIN.RANK[protein]]] +
Age + Gender + Hypertension.composite + DiabetesStatus +
SmokerStatus + Med.Statin.LLD + Med.all.antiplatelet + GFR_MDRD +
BMI + MedHx_CVD + stenose, data = TEMP.DF)
n= 1027, number of events= 59
(1394 observations deleted due to missingness)
coef exp(coef) se(coef) z Pr(>|z|)
TEMP.DF[[TRAITS.PROTEIN.RANK[protein]]][ 0.00105,3.34055] 1.934e-01 1.213e+00 2.668e-01 0.725 0.4684
Age 4.267e-02 1.044e+00 1.756e-02 2.429 0.0151 *
Gendermale -4.383e-02 9.571e-01 2.992e-01 -0.146 0.8835
Hypertension.compositeno 8.567e-03 1.009e+00 4.178e-01 0.021 0.9836
Hypertension.compositeyes NA NA 0.000e+00 NA NA
DiabetesStatusDiabetes -2.315e-02 9.771e-01 3.168e-01 -0.073 0.9418
SmokerStatusEx-smoker -1.168e-01 8.898e-01 2.964e-01 -0.394 0.6936
SmokerStatusNever smoked -9.611e-01 3.825e-01 5.239e-01 -1.835 0.0666 .
Med.Statin.LLDno 3.743e-01 1.454e+00 2.926e-01 1.279 0.2008
Med.Statin.LLDyes NA NA 0.000e+00 NA NA
Med.all.antiplateletno 3.817e-01 1.465e+00 3.714e-01 1.028 0.3040
Med.all.antiplateletyes NA NA 0.000e+00 NA NA
GFR_MDRD -4.331e-03 9.957e-01 6.992e-03 -0.619 0.5357
BMI 8.171e-02 1.085e+00 3.459e-02 2.362 0.0182 *
MedHx_CVDyes 3.652e-01 1.441e+00 2.940e-01 1.242 0.2142
stenose0-49% -1.523e+01 2.419e-07 3.388e+03 -0.004 0.9964
stenose50-70% -5.088e-01 6.012e-01 1.160e+00 -0.438 0.6610
stenose70-90% -2.645e-01 7.676e-01 1.029e+00 -0.257 0.7971
stenose90-99% -2.597e-01 7.713e-01 1.029e+00 -0.252 0.8008
stenose100% (Occlusion) 5.974e-01 1.817e+00 1.439e+00 0.415 0.6780
stenoseNA NA NA 0.000e+00 NA NA
stenose50-99% -1.515e+01 2.629e-07 3.962e+03 -0.004 0.9969
stenose70-99% NA NA 0.000e+00 NA NA
stenose99 NA NA 0.000e+00 NA NA
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
exp(coef) exp(-coef) lower .95 upper .95
TEMP.DF[[TRAITS.PROTEIN.RANK[protein]]][ 0.00105,3.34055] 1.213e+00 8.242e-01 0.71934 2.047
Age 1.044e+00 9.582e-01 1.00828 1.080
Gendermale 9.571e-01 1.045e+00 0.53242 1.721
Hypertension.compositeno 1.009e+00 9.915e-01 0.44469 2.288
Hypertension.compositeyes NA NA NA NA
DiabetesStatusDiabetes 9.771e-01 1.023e+00 0.52513 1.818
SmokerStatusEx-smoker 8.898e-01 1.124e+00 0.49772 1.591
SmokerStatusNever smoked 3.825e-01 2.615e+00 0.13699 1.068
Med.Statin.LLDno 1.454e+00 6.878e-01 0.81941 2.580
Med.Statin.LLDyes NA NA NA NA
Med.all.antiplateletno 1.465e+00 6.827e-01 0.70740 3.033
Med.all.antiplateletyes NA NA NA NA
GFR_MDRD 9.957e-01 1.004e+00 0.98213 1.009
BMI 1.085e+00 9.215e-01 1.01402 1.161
MedHx_CVDyes 1.441e+00 6.940e-01 0.80970 2.564
stenose0-49% 2.419e-07 4.133e+06 0.00000 Inf
stenose50-70% 6.012e-01 1.663e+00 0.06185 5.844
stenose70-90% 7.676e-01 1.303e+00 0.10217 5.767
stenose90-99% 7.713e-01 1.297e+00 0.10256 5.800
stenose100% (Occlusion) 1.817e+00 5.503e-01 0.10830 30.495
stenoseNA NA NA NA NA
stenose50-99% 2.629e-07 3.804e+06 0.00000 Inf
stenose70-99% NA NA NA NA
stenose99 NA NA NA NA
Concordance= 0.67 (se = 0.034 )
Likelihood ratio test= 23.02 on 18 df, p=0.2
Wald test = 20.97 on 18 df, p=0.3
Score (logrank) test = 22.15 on 18 df, p=0.2
> writing the Cox-regression fashizzle to Excel...
Summarizing Cox regression results for ' MCP1_pg_ug_2015_rank ' and its association to ' epstroke.3years ' in ' AEDB.CEA '.
Collecting data.
We have collected the following:
Dataset used..............: AEDB.CEA
Outcome analyzed..........: epstroke.3years
Protein...................: MCP1_pg_ug_2015_rank
Effect size...............: 0.1934
Standard error............: 0.26675
Odds ratio (effect size)..: 1.213
Lower 95% CI..............: 0.719
Upper 95% CI..............: 2.047
T-value...................: 0.725021
P-value...................: 0.4684393
Sample size in model......: 1027
Number of events..........: 59
* Analyzing the effect of plaque proteins on [epcoronary.3years].
- creating temporary SE for this work.
- making a 'Surv' object and adding this to temporary dataframe.
- making strata of each of the plaque proteins and start survival analysis.
> processing [MCP1_pg_ug_2015_rank]; 1 out of 1 proteins.
> cross tabulation of MCP1_pg_ug_2015_rank-stratum.
[-3.34055,0.00105) [ 0.00105,3.34055]
598 598
> fitting the model for MCP1_pg_ug_2015_rank-stratum.
> make a Kaplan-Meier-shizzle...
Vectorized input to `element_text()` is not officially supported.
Results may be unexpected or may change in future versions of ggplot2.
> perform the Cox-regression fashizzle and plot it...
Call:
coxph(formula = Surv(TEMP.DF[, eptime], event) ~ TEMP.DF[[TRAITS.PROTEIN.RANK[protein]]] +
Age + Gender + Hypertension.composite + DiabetesStatus +
SmokerStatus + Med.Statin.LLD + Med.all.antiplatelet + GFR_MDRD +
BMI + MedHx_CVD + stenose, data = TEMP.DF)
n= 1027, number of events= 78
(1394 observations deleted due to missingness)
coef exp(coef) se(coef) z Pr(>|z|)
TEMP.DF[[TRAITS.PROTEIN.RANK[protein]]][ 0.00105,3.34055] -1.290e-01 8.790e-01 2.315e-01 -0.557 0.577222
Age -8.243e-04 9.992e-01 1.506e-02 -0.055 0.956352
Gendermale 8.625e-01 2.369e+00 3.029e-01 2.848 0.004404 **
Hypertension.compositeno -9.192e-01 3.989e-01 5.209e-01 -1.765 0.077619 .
Hypertension.compositeyes NA NA 0.000e+00 NA NA
DiabetesStatusDiabetes -1.045e-01 9.008e-01 2.759e-01 -0.379 0.704879
SmokerStatusEx-smoker -6.291e-01 5.331e-01 2.573e-01 -2.445 0.014496 *
SmokerStatusNever smoked -2.735e-01 7.607e-01 3.647e-01 -0.750 0.453241
Med.Statin.LLDno 1.147e-01 1.122e+00 2.730e-01 0.420 0.674465
Med.Statin.LLDyes NA NA 0.000e+00 NA NA
Med.all.antiplateletno 3.664e-01 1.443e+00 3.348e-01 1.095 0.273710
Med.all.antiplateletyes NA NA 0.000e+00 NA NA
GFR_MDRD -2.059e-02 9.796e-01 5.940e-03 -3.466 0.000528 ***
BMI 1.350e-02 1.014e+00 3.290e-02 0.410 0.681500
MedHx_CVDyes 6.867e-01 1.987e+00 2.795e-01 2.457 0.014000 *
stenose0-49% -1.584e+01 1.320e-07 3.048e+03 -0.005 0.995853
stenose50-70% -1.713e+00 1.803e-01 1.418e+00 -1.208 0.227098
stenose70-90% -1.372e-01 8.718e-01 1.022e+00 -0.134 0.893141
stenose90-99% -1.918e-01 8.255e-01 1.022e+00 -0.188 0.851135
stenose100% (Occlusion) -1.540e+01 2.059e-07 2.452e+03 -0.006 0.994990
stenoseNA NA NA 0.000e+00 NA NA
stenose50-99% 8.142e-01 2.257e+00 1.429e+00 0.570 0.568977
stenose70-99% NA NA 0.000e+00 NA NA
stenose99 NA NA 0.000e+00 NA NA
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
exp(coef) exp(-coef) lower .95 upper .95
TEMP.DF[[TRAITS.PROTEIN.RANK[protein]]][ 0.00105,3.34055] 8.790e-01 1.138e+00 0.55840 1.3835
Age 9.992e-01 1.001e+00 0.97011 1.0291
Gendermale 2.369e+00 4.221e-01 1.30849 4.2894
Hypertension.compositeno 3.989e-01 2.507e+00 0.14370 1.1071
Hypertension.compositeyes NA NA NA NA
DiabetesStatusDiabetes 9.008e-01 1.110e+00 0.52459 1.5468
SmokerStatusEx-smoker 5.331e-01 1.876e+00 0.32192 0.8827
SmokerStatusNever smoked 7.607e-01 1.315e+00 0.37224 1.5546
Med.Statin.LLDno 1.122e+00 8.917e-01 0.65677 1.9151
Med.Statin.LLDyes NA NA NA NA
Med.all.antiplateletno 1.443e+00 6.932e-01 0.74849 2.7802
Med.all.antiplateletyes NA NA NA NA
GFR_MDRD 9.796e-01 1.021e+00 0.96828 0.9911
BMI 1.014e+00 9.866e-01 0.95030 1.0811
MedHx_CVDyes 1.987e+00 5.032e-01 1.14909 3.4365
stenose0-49% 1.320e-07 7.576e+06 0.00000 Inf
stenose50-70% 1.803e-01 5.547e+00 0.01118 2.9061
stenose70-90% 8.718e-01 1.147e+00 0.11772 6.4559
stenose90-99% 8.255e-01 1.211e+00 0.11137 6.1183
stenose100% (Occlusion) 2.059e-07 4.857e+06 0.00000 Inf
stenoseNA NA NA NA NA
stenose50-99% 2.257e+00 4.430e-01 0.13703 37.1835
stenose70-99% NA NA NA NA
stenose99 NA NA NA NA
Concordance= 0.726 (se = 0.028 )
Likelihood ratio test= 49.9 on 18 df, p=8e-05
Wald test = 43.46 on 18 df, p=7e-04
Score (logrank) test = 47.19 on 18 df, p=2e-04
> writing the Cox-regression fashizzle to Excel...
Summarizing Cox regression results for ' MCP1_pg_ug_2015_rank ' and its association to ' epcoronary.3years ' in ' AEDB.CEA '.
Collecting data.
We have collected the following:
Dataset used..............: AEDB.CEA
Outcome analyzed..........: epcoronary.3years
Protein...................: MCP1_pg_ug_2015_rank
Effect size...............: -0.129025
Standard error............: 0.231458
Odds ratio (effect size)..: 0.879
Lower 95% CI..............: 0.558
Upper 95% CI..............: 1.384
T-value...................: -0.557447
P-value...................: 0.577222
Sample size in model......: 1027
Number of events..........: 78
* Analyzing the effect of plaque proteins on [epcvdeath.3years].
- creating temporary SE for this work.
- making a 'Surv' object and adding this to temporary dataframe.
- making strata of each of the plaque proteins and start survival analysis.
> processing [MCP1_pg_ug_2015_rank]; 1 out of 1 proteins.
> cross tabulation of MCP1_pg_ug_2015_rank-stratum.
[-3.34055,0.00105) [ 0.00105,3.34055]
598 598
> fitting the model for MCP1_pg_ug_2015_rank-stratum.
> make a Kaplan-Meier-shizzle...
Vectorized input to `element_text()` is not officially supported.
Results may be unexpected or may change in future versions of ggplot2.
> perform the Cox-regression fashizzle and plot it...
Call:
coxph(formula = Surv(TEMP.DF[, eptime], event) ~ TEMP.DF[[TRAITS.PROTEIN.RANK[protein]]] +
Age + Gender + Hypertension.composite + DiabetesStatus +
SmokerStatus + Med.Statin.LLD + Med.all.antiplatelet + GFR_MDRD +
BMI + MedHx_CVD + stenose, data = TEMP.DF)
n= 1027, number of events= 33
(1394 observations deleted due to missingness)
coef exp(coef) se(coef) z Pr(>|z|)
TEMP.DF[[TRAITS.PROTEIN.RANK[protein]]][ 0.00105,3.34055] -2.044e-01 8.152e-01 3.583e-01 -0.570 0.56836
Age 6.640e-02 1.069e+00 2.655e-02 2.501 0.01238 *
Gendermale 1.258e+00 3.520e+00 5.565e-01 2.261 0.02373 *
Hypertension.compositeno -1.774e+01 1.978e-08 4.017e+03 -0.004 0.99648
Hypertension.compositeyes NA NA 0.000e+00 NA NA
DiabetesStatusDiabetes -3.835e-02 9.624e-01 4.261e-01 -0.090 0.92829
SmokerStatusEx-smoker -5.672e-01 5.671e-01 4.021e-01 -1.411 0.15834
SmokerStatusNever smoked -4.302e-01 6.504e-01 6.158e-01 -0.699 0.48479
Med.Statin.LLDno 8.681e-02 1.091e+00 4.156e-01 0.209 0.83454
Med.Statin.LLDyes NA NA 0.000e+00 NA NA
Med.all.antiplateletno 1.130e+00 3.096e+00 4.161e-01 2.716 0.00661 **
Med.all.antiplateletyes NA NA 0.000e+00 NA NA
GFR_MDRD -3.395e-02 9.666e-01 9.369e-03 -3.624 0.00029 ***
BMI 8.410e-02 1.088e+00 5.118e-02 1.643 0.10031
MedHx_CVDyes 7.683e-01 2.156e+00 4.605e-01 1.668 0.09528 .
stenose0-49% -2.031e+01 1.509e-09 2.713e+04 -0.001 0.99940
stenose50-70% -1.041e+00 3.531e-01 1.236e+00 -0.842 0.39970
stenose70-90% -1.480e+00 2.276e-01 1.069e+00 -1.385 0.16609
stenose90-99% -1.069e+00 3.434e-01 1.054e+00 -1.014 0.31055
stenose100% (Occlusion) -1.957e+01 3.154e-09 1.974e+04 -0.001 0.99921
stenoseNA NA NA 0.000e+00 NA NA
stenose50-99% -1.939e+01 3.802e-09 3.407e+04 -0.001 0.99955
stenose70-99% NA NA 0.000e+00 NA NA
stenose99 NA NA 0.000e+00 NA NA
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
exp(coef) exp(-coef) lower .95 upper .95
TEMP.DF[[TRAITS.PROTEIN.RANK[protein]]][ 0.00105,3.34055] 8.152e-01 1.227e+00 0.40392 1.6451
Age 1.069e+00 9.358e-01 1.01447 1.1257
Gendermale 3.520e+00 2.841e-01 1.18264 10.4751
Hypertension.compositeno 1.978e-08 5.054e+07 0.00000 Inf
Hypertension.compositeyes NA NA NA NA
DiabetesStatusDiabetes 9.624e-01 1.039e+00 0.41748 2.2185
SmokerStatusEx-smoker 5.671e-01 1.763e+00 0.25789 1.2471
SmokerStatusNever smoked 6.504e-01 1.538e+00 0.19453 2.1744
Med.Statin.LLDno 1.091e+00 9.168e-01 0.48298 2.4631
Med.Statin.LLDyes NA NA NA NA
Med.all.antiplateletno 3.096e+00 3.230e-01 1.36967 6.9988
Med.all.antiplateletyes NA NA NA NA
GFR_MDRD 9.666e-01 1.035e+00 0.94903 0.9845
BMI 1.088e+00 9.193e-01 0.98393 1.2025
MedHx_CVDyes 2.156e+00 4.638e-01 0.87427 5.3170
stenose0-49% 1.509e-09 6.625e+08 0.00000 Inf
stenose50-70% 3.531e-01 2.832e+00 0.03132 3.9813
stenose70-90% 2.276e-01 4.394e+00 0.02802 1.8490
stenose90-99% 3.434e-01 2.912e+00 0.04351 2.7102
stenose100% (Occlusion) 3.154e-09 3.170e+08 0.00000 Inf
stenoseNA NA NA NA NA
stenose50-99% 3.802e-09 2.630e+08 0.00000 Inf
stenose70-99% NA NA NA NA
stenose99 NA NA NA NA
Concordance= 0.839 (se = 0.031 )
Likelihood ratio test= 60.15 on 18 df, p=2e-06
Wald test = 20.83 on 18 df, p=0.3
Score (logrank) test = 56.03 on 18 df, p=9e-06
> writing the Cox-regression fashizzle to Excel...
Summarizing Cox regression results for ' MCP1_pg_ug_2015_rank ' and its association to ' epcvdeath.3years ' in ' AEDB.CEA '.
Collecting data.
We have collected the following:
Dataset used..............: AEDB.CEA
Outcome analyzed..........: epcvdeath.3years
Protein...................: MCP1_pg_ug_2015_rank
Effect size...............: -0.204374
Standard error............: 0.358254
Odds ratio (effect size)..: 0.815
Lower 95% CI..............: 0.404
Upper 95% CI..............: 1.645
T-value...................: -0.570471
P-value...................: 0.568358
Sample size in model......: 1027
Number of events..........: 33
cat("- Edit the column names...\n")
- Edit the column names...
colnames(COX.results) = c("Dataset", "Outcome", "CpG",
"Beta", "s.e.m.",
"HR", "low95CI", "up95CI",
"Z-value", "P-value", "SampleSize", "N_events")
cat("- Correct the variable types...\n")
- Correct the variable types...
COX.results$Beta <- as.numeric(COX.results$Beta)
COX.results$s.e.m. <- as.numeric(COX.results$s.e.m.)
COX.results$HR <- as.numeric(COX.results$HR)
COX.results$low95CI <- as.numeric(COX.results$low95CI)
COX.results$up95CI <- as.numeric(COX.results$up95CI)
COX.results$`Z-value` <- as.numeric(COX.results$`Z-value`)
COX.results$`P-value` <- as.numeric(COX.results$`P-value`)
COX.results$SampleSize <- as.numeric(COX.results$SampleSize)
COX.results$N_events <- as.numeric(COX.results$N_events)
AEDB.CEA.COX.results <- COX.results
# Save the data
cat("- Writing results to Excel-file...\n")
- Writing results to Excel-file...
head.style <- createStyle(textDecoration = "BOLD")
write.xlsx(AEDB.CEA.COX.results,
file = paste0(OUT_loc, "/",Today,".AEDB.CEA.Cox.2G.MODEL2.xlsx"),
creator = "Sander W. van der Laan",
sheetName = "Results", headerStyle = head.style,
row.names = FALSE, col.names = TRUE, overwrite = TRUE)
# Removing intermediates
cat("- Removing intermediate files...\n")
- Removing intermediate files...
rm(TEMP.DF, protein, fit, cox, coxplot, COX.results, COX.results.TEMP, head.style, AEDB.CEA.COX.results)
rm(head.style)
object 'head.style' not found
MODEL 1
# Set up a dataframe to receive results
COX.results <- data.frame(matrix(NA, ncol = 12, nrow = 0))
# Looping over each protein/endpoint/time combination
for (i in 1:length(times30)){
eptime = times30[i]
ep = endpoints30[i]
cat(paste0("* Analyzing the effect of plaque proteins on [",ep,"].\n"))
cat(" - creating temporary SE for this work.\n")
TEMP.DF = as.data.frame(AEDB.CEA)
cat(" - making a 'Surv' object and adding this to temporary dataframe.\n")
TEMP.DF$event <- as.integer(TEMP.DF[,ep])
TEMP.DF$y <- Surv(time = TEMP.DF[,eptime], event = TEMP.DF$event)
cat(" - making strata of each of the plaque proteins and start survival analysis.\n")
for (protein in 1:length(TRAITS.PROTEIN.RANK)){
cat(paste0(" > processing [",TRAITS.PROTEIN.RANK[protein],"]; ",protein," out of ",length(TRAITS.PROTEIN.RANK)," proteins.\n"))
# splitting into two groups
TEMP.DF[[ TRAITS.PROTEIN.RANK[protein] ]] <- cut2(TEMP.DF[,TRAITS.PROTEIN.RANK[protein]], g = 2)
cat(paste0(" > cross tabulation of ",TRAITS.PROTEIN.RANK[protein],"-stratum.\n"))
show(table(TEMP.DF[[ TRAITS.PROTEIN.RANK[protein] ]]))
cat(paste0("\n > fitting the model for ",TRAITS.PROTEIN.RANK[protein],"-stratum.\n"))
fit <- survfit(as.formula(paste0("y ~ ", TRAITS.PROTEIN.RANK[protein])), data = TEMP.DF)
cat(paste0("\n > make a Kaplan-Meier-shizzle...\n"))
# make Kaplan-Meier curve and save it
show(ggsurvplot(fit, data = TEMP.DF,
palette = c("#DB003F", "#1290D9"),
# palete = c("F59D10", "#DB003F", "#49A01D", "#1290D9"),
linetype = c(1,2),
ylim = c(0.75, 1),
# linetype = c(1,2,3,4),
# conf.int = FALSE, conf.int.fill = "#595A5C", conf.int.alpha = 0.1,
pval = FALSE, pval.method = FALSE, pval.size = 4,
risk.table = TRUE, risk.table.y.text = FALSE, tables.y.text.col = TRUE, fontsize = 4,
censor = FALSE,
legend = "right",
legend.title = paste0("",TRAITS.PROTEIN.RANK[protein],""),
legend.labs = c("low", "high"),
title = paste0("Risk of ",ep,""), xlab = "Time [days]", font.main = c(16, "bold", "black")))
dev.copy2pdf(file = paste0(COX_loc,"/",
Today,".AEDB.CEA.survival.",ep,".2G.",
TRAITS.PROTEIN.RANK[protein],".30days.pdf"), width = 12, height = 10, onefile = FALSE)
cat(paste0("\n > perform the Cox-regression fashizzle and plot it...\n"))
### Do Cox-regression and plot it
### MODEL 1 (Simple model)
cox = coxph(Surv(TEMP.DF[,eptime], event) ~ TEMP.DF[[ TRAITS.PROTEIN.RANK[protein] ]]+Age+Gender, data = TEMP.DF)
coxplot = coxph(Surv(TEMP.DF[,eptime], event) ~ strata(TEMP.DF[[ TRAITS.PROTEIN.RANK[protein] ]])+Age+Gender, data = TEMP.DF)
plot(survfit(coxplot), main = paste0("Cox proportional hazard of [",ep,"] per [",eptime,"]."),
ylim = c(0.75, 1), xlim = c(0,3), col = c("#595A5C", "#DB003F", "#1290D9"),
# ylim = c(0, 1), xlim = c(0,3), col = c("#DB003F", "#1290D9"),
lty = c(1,2), lwd = 2,
ylab = "Suvival probability", xlab = "FU time [days]",
mark.time = FALSE, axes = FALSE, bty = "n")
legend("topright",
c("low", "high"),
title = paste0("",TRAITS.PROTEIN.RANK[protein],""),
col = c("#DB003F", "#1290D9"),
lty = c(1,2), lwd = 2,
bty = "n")
axis(side = 1, at = seq(0, 3, by = 1))
axis(side = 2, at = seq(0, 1, by = 0.2))
dev.copy2pdf(file = paste0(COX_loc,"/",
Today,".AEDB.CEA.Cox.",ep,".2G.",
# Today,".AEDB.CEA.Cox.",ep,".4G.",
TRAITS.PROTEIN.RANK[protein],".MODEL1.30days.pdf"), height = 12, width = 10, onefile = TRUE)
show(summary(cox))
cat(paste0("\n > writing the Cox-regression fashizzle to Excel...\n"))
COX.results.TEMP <- data.frame(matrix(NA, ncol = 12, nrow = 0))
COX.results.TEMP[1,] = COX.STAT(cox, "AEDB.CEA", ep, TRAITS.PROTEIN.RANK[protein])
COX.results = rbind(COX.results, COX.results.TEMP)
}
}
cat("- Edit the column names...\n")
colnames(COX.results) = c("Dataset", "Outcome", "CpG",
"Beta", "s.e.m.",
"HR", "low95CI", "up95CI",
"Z-value", "P-value", "SampleSize", "N_events")
cat("- Correct the variable types...\n")
COX.results$Beta <- as.numeric(COX.results$Beta)
COX.results$s.e.m. <- as.numeric(COX.results$s.e.m.)
COX.results$HR <- as.numeric(COX.results$HR)
COX.results$low95CI <- as.numeric(COX.results$low95CI)
COX.results$up95CI <- as.numeric(COX.results$up95CI)
COX.results$`Z-value` <- as.numeric(COX.results$`Z-value`)
COX.results$`P-value` <- as.numeric(COX.results$`P-value`)
COX.results$SampleSize <- as.numeric(COX.results$SampleSize)
COX.results$N_events <- as.numeric(COX.results$N_events)
AEDB.CEA.COX.results <- COX.results
# Save the data
library(openxlsx)
cat("- Writing results to Excel-file...\n")
head.style <- createStyle(textDecoration = "BOLD")
write.xlsx(AEDB.CEA.COX.results,
file = paste0(OUT_loc, "/",Today,".AEDB.CEA.Cox.2G.MODEL1.30days.xlsx"),
creator = "Sander W. van der Laan",
sheetName = "Results", headerStyle = head.style,
row.names = FALSE, col.names = TRUE, overwrite = TRUE)
# Removing intermediates
cat("- Removing intermediate files...\n")
#rm(TEMP.DF, protein, fit, cox, coxplot, COX.results, COX.results.TEMP, head.style, AEDB.CEA.COX.results)
#rm(head.style)
MODEL 2
# Set up a dataframe to receive results
COX.results <- data.frame(matrix(NA, ncol = 12, nrow = 0))
# Looping over each protein/endpoint/time combination
for (i in 1:length(times30)){
eptime = times30[i]
ep = endpoints30[i]
cat(paste0("* Analyzing the effect of plaque proteins on [",ep,"].\n"))
cat(" - creating temporary SE for this work.\n")
TEMP.DF = as.data.frame(AEDB.CEA)
cat(" - making a 'Surv' object and adding this to temporary dataframe.\n")
TEMP.DF$event <- as.integer(TEMP.DF[,ep])
#as.integer(TEMP.DF[,ep] == "Excluded")
TEMP.DF$y <- Surv(time = TEMP.DF[,eptime], event = TEMP.DF$event)
cat(" - making strata of each of the plaque proteins and start survival analysis.\n")
for (protein in 1:length(TRAITS.PROTEIN.RANK)){
cat(paste0(" > processing [",TRAITS.PROTEIN.RANK[protein],"]; ",protein," out of ",length(TRAITS.PROTEIN.RANK)," proteins.\n"))
# splitting into two groups
TEMP.DF[[ TRAITS.PROTEIN.RANK[protein] ]] <- cut2(TEMP.DF[,TRAITS.PROTEIN.RANK[protein]], g = 2)
cat(paste0(" > cross tabulation of ",TRAITS.PROTEIN.RANK[protein],"-stratum.\n"))
show(table(TEMP.DF[[ TRAITS.PROTEIN.RANK[protein] ]]))
cat(paste0("\n > fitting the model for ",TRAITS.PROTEIN.RANK[protein],"-stratum.\n"))
fit <- survfit(as.formula(paste0("y ~ ", TRAITS.PROTEIN.RANK[protein])), data = TEMP.DF)
cat(paste0("\n > make a Kaplan-Meier-shizzle...\n"))
# make Kaplan-Meier curve and save it
show(ggsurvplot(fit, data = TEMP.DF,
palette = c("#DB003F", "#1290D9"),
# palete = c("F59D10", "#DB003F", "#49A01D", "#1290D9"),
linetype = c(1,2),
ylim = c(0.75, 1),
# linetype = c(1,2,3,4),
# conf.int = FALSE, conf.int.fill = "#595A5C", conf.int.alpha = 0.1,
pval = FALSE, pval.method = FALSE, pval.size = 4,
risk.table = TRUE, risk.table.y.text = FALSE, tables.y.text.col = TRUE, fontsize = 4,
censor = FALSE,
legend = "right",
legend.title = paste0("",TRAITS.PROTEIN.RANK[protein],""),
legend.labs = c("low", "high"),
title = paste0("Risk of ",ep,""), xlab = "Time [days]", font.main = c(16, "bold", "black")))
dev.copy2pdf(file = paste0(COX_loc,"/",
Today,".AEDB.CEA.survival.",ep,".2G.",
TRAITS.PROTEIN.RANK[protein],".30days.pdf"), width = 12, height = 10, onefile = FALSE)
cat(paste0("\n > perform the Cox-regression fashizzle and plot it...\n"))
### Do Cox-regression and plot it
### MODEL 2 adjusted for age, sex, hypertension, diabetes, smoking, LDL-C levels, lipid-lowering drugs, antiplatelet drugs, eGFR, BMI, history of CVD, level of stenosis
cox = coxph(Surv(TEMP.DF[,eptime], event) ~ TEMP.DF[[ TRAITS.PROTEIN.RANK[protein] ]]+Age + Gender + Hypertension.composite + DiabetesStatus + SmokerStatus + Med.Statin.LLD + Med.all.antiplatelet + GFR_MDRD + BMI + MedHx_CVD + stenose, data = TEMP.DF)
coxplot = coxph(Surv(TEMP.DF[,eptime], event) ~ strata(TEMP.DF[[ TRAITS.PROTEIN.RANK[protein] ]])+Age + Gender + Hypertension.composite + DiabetesStatus + SmokerStatus + Med.Statin.LLD + Med.all.antiplatelet + GFR_MDRD + BMI + MedHx_CVD + stenose, data = TEMP.DF)
plot(survfit(coxplot), main = paste0("Cox proportional hazard of [",ep,"] per [",eptime,"]."),
ylim = c(0.75, 1), xlim = c(0,3), col = c("#DB003F", "#1290D9"),
# ylim = c(0, 1), xlim = c(0,3), col = c("#DB003F", "#1290D9"),
lty = c(1,2), lwd = 2,
ylab = "Suvival probability", xlab = "FU time [days]",
mark.time = FALSE, axes = FALSE, bty = "n")
legend("topright",
c("low", "high"),
title = paste0("",TRAITS.PROTEIN.RANK[protein],""),
col = c("#DB003F", "#1290D9"),
lty = c(1,2), lwd = 2,
bty = "n")
axis(side = 1, at = seq(0, 3, by = 1))
axis(side = 2, at = seq(0, 1, by = 0.2))
dev.copy2pdf(file = paste0(COX_loc,"/",
Today,".AEDB.CEA.Cox.",ep,".2G.",
# Today,".AEDB.CEA.Cox.",ep,".4G.",
TRAITS.PROTEIN.RANK[protein],".MODEL2.30days.pdf"), height = 12, width = 10, onefile = TRUE)
show(summary(cox))
cat(paste0("\n > writing the Cox-regression fashizzle to Excel...\n"))
COX.results.TEMP <- data.frame(matrix(NA, ncol = 12, nrow = 0))
COX.results.TEMP[1,] = COX.STAT(cox, "AEDB.CEA", ep, TRAITS.PROTEIN.RANK[protein])
COX.results = rbind(COX.results, COX.results.TEMP)
}
}
cat("- Edit the column names...\n")
colnames(COX.results) = c("Dataset", "Outcome", "CpG",
"Beta", "s.e.m.",
"HR", "low95CI", "up95CI",
"Z-value", "P-value", "SampleSize", "N_events")
cat("- Correct the variable types...\n")
COX.results$Beta <- as.numeric(COX.results$Beta)
COX.results$s.e.m. <- as.numeric(COX.results$s.e.m.)
COX.results$HR <- as.numeric(COX.results$HR)
COX.results$low95CI <- as.numeric(COX.results$low95CI)
COX.results$up95CI <- as.numeric(COX.results$up95CI)
COX.results$`Z-value` <- as.numeric(COX.results$`Z-value`)
COX.results$`P-value` <- as.numeric(COX.results$`P-value`)
COX.results$SampleSize <- as.numeric(COX.results$SampleSize)
COX.results$N_events <- as.numeric(COX.results$N_events)
AEDB.CEA.COX.results <- COX.results
# Save the data
cat("- Writing results to Excel-file...\n")
head.style <- createStyle(textDecoration = "BOLD")
write.xlsx(AEDB.CEA.COX.results,
file = paste0(OUT_loc, "/",Today,".AEDB.CEA.Cox.2G.MODEL2.30days.xlsx"),
creator = "Sander W. van der Laan",
sheetName = "Results", headerStyle = head.style,
row.names = FALSE, col.names = TRUE, overwrite = TRUE)
# Removing intermediates
cat("- Removing intermediate files...\n")
rm(TEMP.DF, protein, fit, cox, coxplot, COX.results, COX.results.TEMP, head.style, AEDB.CEA.COX.results)
rm(head.style)
MODEL 1
# Set up a dataframe to receive results
COX.results <- data.frame(matrix(NA, ncol = 12, nrow = 0))
# Looping over each protein/endpoint/time combination
for (i in 1:length(times90)){
eptime = times90[i]
ep = endpoints90[i]
cat(paste0("* Analyzing the effect of plaque proteins on [",ep,"].\n"))
cat(" - creating temporary SE for this work.\n")
TEMP.DF = as.data.frame(AEDB.CEA)
cat(" - making a 'Surv' object and adding this to temporary dataframe.\n")
TEMP.DF$event <- as.integer(TEMP.DF[,ep])
TEMP.DF$y <- Surv(time = TEMP.DF[,eptime], event = TEMP.DF$event)
cat(" - making strata of each of the plaque proteins and start survival analysis.\n")
for (protein in 1:length(TRAITS.PROTEIN.RANK)){
cat(paste0(" > processing [",TRAITS.PROTEIN.RANK[protein],"]; ",protein," out of ",length(TRAITS.PROTEIN.RANK)," proteins.\n"))
# splitting into two groups
TEMP.DF[[ TRAITS.PROTEIN.RANK[protein] ]] <- cut2(TEMP.DF[,TRAITS.PROTEIN.RANK[protein]], g = 2)
cat(paste0(" > cross tabulation of ",TRAITS.PROTEIN.RANK[protein],"-stratum.\n"))
show(table(TEMP.DF[[ TRAITS.PROTEIN.RANK[protein] ]]))
cat(paste0("\n > fitting the model for ",TRAITS.PROTEIN.RANK[protein],"-stratum.\n"))
fit <- survfit(as.formula(paste0("y ~ ", TRAITS.PROTEIN.RANK[protein])), data = TEMP.DF)
cat(paste0("\n > make a Kaplan-Meier-shizzle...\n"))
# make Kaplan-Meier curve and save it
show(ggsurvplot(fit, data = TEMP.DF,
palette = c("#DB003F", "#1290D9"),
# palete = c("F59D10", "#DB003F", "#49A01D", "#1290D9"),
linetype = c(1,2),
ylim = c(0.75, 1),
# linetype = c(1,2,3,4),
# conf.int = FALSE, conf.int.fill = "#595A5C", conf.int.alpha = 0.1,
pval = FALSE, pval.method = FALSE, pval.size = 4,
risk.table = TRUE, risk.table.y.text = FALSE, tables.y.text.col = TRUE, fontsize = 4,
censor = FALSE,
legend = "right",
legend.title = paste0("",TRAITS.PROTEIN.RANK[protein],""),
legend.labs = c("low", "high"),
title = paste0("Risk of ",ep,""), xlab = "Time [days]", font.main = c(16, "bold", "black")))
dev.copy2pdf(file = paste0(COX_loc,"/",
Today,".AEDB.CEA.survival.",ep,".2G.",
TRAITS.PROTEIN.RANK[protein],".90days.pdf"), width = 12, height = 10, onefile = FALSE)
cat(paste0("\n > perform the Cox-regression fashizzle and plot it...\n"))
### Do Cox-regression and plot it
### MODEL 1 (Simple model)
cox = coxph(Surv(TEMP.DF[,eptime], event) ~ TEMP.DF[[ TRAITS.PROTEIN.RANK[protein] ]]+Age+Gender, data = TEMP.DF)
coxplot = coxph(Surv(TEMP.DF[,eptime], event) ~ strata(TEMP.DF[[ TRAITS.PROTEIN.RANK[protein] ]])+Age+Gender, data = TEMP.DF)
plot(survfit(coxplot), main = paste0("Cox proportional hazard of [",ep,"] per [",eptime,"]."),
ylim = c(0.75, 1), xlim = c(0,3), col = c("#595A5C", "#DB003F", "#1290D9"),
# ylim = c(0, 1), xlim = c(0,3), col = c("#DB003F", "#1290D9"),
lty = c(1,2), lwd = 2,
ylab = "Suvival probability", xlab = "FU time [days]",
mark.time = FALSE, axes = FALSE, bty = "n")
legend("topright",
c("low", "high"),
title = paste0("",TRAITS.PROTEIN.RANK[protein],""),
col = c("#DB003F", "#1290D9"),
lty = c(1,2), lwd = 2,
bty = "n")
axis(side = 1, at = seq(0, 3, by = 1))
axis(side = 2, at = seq(0, 1, by = 0.2))
dev.copy2pdf(file = paste0(COX_loc,"/",
Today,".AEDB.CEA.Cox.",ep,".2G.",
# Today,".AEDB.CEA.Cox.",ep,".4G.",
TRAITS.PROTEIN.RANK[protein],".MODEL1.90days.pdf"), height = 12, width = 10, onefile = TRUE)
show(summary(cox))
cat(paste0("\n > writing the Cox-regression fashizzle to Excel...\n"))
COX.results.TEMP <- data.frame(matrix(NA, ncol = 12, nrow = 0))
COX.results.TEMP[1,] = COX.STAT(cox, "AEDB.CEA", ep, TRAITS.PROTEIN.RANK[protein])
COX.results = rbind(COX.results, COX.results.TEMP)
}
}
cat("- Edit the column names...\n")
colnames(COX.results) = c("Dataset", "Outcome", "CpG",
"Beta", "s.e.m.",
"HR", "low95CI", "up95CI",
"Z-value", "P-value", "SampleSize", "N_events")
cat("- Correct the variable types...\n")
COX.results$Beta <- as.numeric(COX.results$Beta)
COX.results$s.e.m. <- as.numeric(COX.results$s.e.m.)
COX.results$HR <- as.numeric(COX.results$HR)
COX.results$low95CI <- as.numeric(COX.results$low95CI)
COX.results$up95CI <- as.numeric(COX.results$up95CI)
COX.results$`Z-value` <- as.numeric(COX.results$`Z-value`)
COX.results$`P-value` <- as.numeric(COX.results$`P-value`)
COX.results$SampleSize <- as.numeric(COX.results$SampleSize)
COX.results$N_events <- as.numeric(COX.results$N_events)
AEDB.CEA.COX.results <- COX.results
# Save the data
library(openxlsx)
cat("- Writing results to Excel-file...\n")
head.style <- createStyle(textDecoration = "BOLD")
write.xlsx(AEDB.CEA.COX.results,
file = paste0(OUT_loc, "/",Today,".AEDB.CEA.Cox.2G.MODEL1.90days.xlsx"),
creator = "Sander W. van der Laan",
sheetName = "Results", headerStyle = head.style,
row.names = FALSE, col.names = TRUE, overwrite = TRUE)
# Removing intermediates
cat("- Removing intermediate files...\n")
#rm(TEMP.DF, protein, fit, cox, coxplot, COX.results, COX.results.TEMP, head.style, AEDB.CEA.COX.results)
#rm(head.style)
MODEL 2
# Set up a dataframe to receive results
COX.results <- data.frame(matrix(NA, ncol = 12, nrow = 0))
# Looping over each protein/endpoint/time combination
for (i in 1:length(times90)){
eptime = times90[i]
ep = endpoints90[i]
cat(paste0("* Analyzing the effect of plaque proteins on [",ep,"].\n"))
cat(" - creating temporary SE for this work.\n")
TEMP.DF = as.data.frame(AEDB.CEA)
cat(" - making a 'Surv' object and adding this to temporary dataframe.\n")
TEMP.DF$event <- as.integer(TEMP.DF[,ep])
#as.integer(TEMP.DF[,ep] == "Excluded")
TEMP.DF$y <- Surv(time = TEMP.DF[,eptime], event = TEMP.DF$event)
cat(" - making strata of each of the plaque proteins and start survival analysis.\n")
for (protein in 1:length(TRAITS.PROTEIN.RANK)){
cat(paste0(" > processing [",TRAITS.PROTEIN.RANK[protein],"]; ",protein," out of ",length(TRAITS.PROTEIN.RANK)," proteins.\n"))
# splitting into two groups
TEMP.DF[[ TRAITS.PROTEIN.RANK[protein] ]] <- cut2(TEMP.DF[,TRAITS.PROTEIN.RANK[protein]], g = 2)
cat(paste0(" > cross tabulation of ",TRAITS.PROTEIN.RANK[protein],"-stratum.\n"))
show(table(TEMP.DF[[ TRAITS.PROTEIN.RANK[protein] ]]))
cat(paste0("\n > fitting the model for ",TRAITS.PROTEIN.RANK[protein],"-stratum.\n"))
fit <- survfit(as.formula(paste0("y ~ ", TRAITS.PROTEIN.RANK[protein])), data = TEMP.DF)
cat(paste0("\n > make a Kaplan-Meier-shizzle...\n"))
# make Kaplan-Meier curve and save it
show(ggsurvplot(fit, data = TEMP.DF,
palette = c("#DB003F", "#1290D9"),
# palete = c("F59D10", "#DB003F", "#49A01D", "#1290D9"),
linetype = c(1,2),
ylim = c(0.75, 1),
# linetype = c(1,2,3,4),
# conf.int = FALSE, conf.int.fill = "#595A5C", conf.int.alpha = 0.1,
pval = FALSE, pval.method = FALSE, pval.size = 4,
risk.table = TRUE, risk.table.y.text = FALSE, tables.y.text.col = TRUE, fontsize = 4,
censor = FALSE,
legend = "right",
legend.title = paste0("",TRAITS.PROTEIN.RANK[protein],""),
legend.labs = c("low", "high"),
title = paste0("Risk of ",ep,""), xlab = "Time [days]", font.main = c(16, "bold", "black")))
dev.copy2pdf(file = paste0(COX_loc,"/",
Today,".AEDB.CEA.survival.",ep,".2G.",
TRAITS.PROTEIN.RANK[protein],".90days.pdf"), width = 12, height = 10, onefile = FALSE)
cat(paste0("\n > perform the Cox-regression fashizzle and plot it...\n"))
### Do Cox-regression and plot it
### MODEL 2 adjusted for age, sex, hypertension, diabetes, smoking, LDL-C levels, lipid-lowering drugs, antiplatelet drugs, eGFR, BMI, history of CVD, level of stenosis
cox = coxph(Surv(TEMP.DF[,eptime], event) ~ TEMP.DF[[ TRAITS.PROTEIN.RANK[protein] ]]+Age + Gender + Hypertension.composite + DiabetesStatus + SmokerStatus + Med.Statin.LLD + Med.all.antiplatelet + GFR_MDRD + BMI + MedHx_CVD + stenose, data = TEMP.DF)
coxplot = coxph(Surv(TEMP.DF[,eptime], event) ~ strata(TEMP.DF[[ TRAITS.PROTEIN.RANK[protein] ]])+Age + Gender + Hypertension.composite + DiabetesStatus + SmokerStatus + Med.Statin.LLD + Med.all.antiplatelet + GFR_MDRD + BMI + MedHx_CVD + stenose, data = TEMP.DF)
plot(survfit(coxplot), main = paste0("Cox proportional hazard of [",ep,"] per [",eptime,"]."),
ylim = c(0.75, 1), xlim = c(0,3), col = c("#DB003F", "#1290D9"),
# ylim = c(0, 1), xlim = c(0,3), col = c("#DB003F", "#1290D9"),
lty = c(1,2), lwd = 2,
ylab = "Suvival probability", xlab = "FU time [days]",
mark.time = FALSE, axes = FALSE, bty = "n")
legend("topright",
c("low", "high"),
title = paste0("",TRAITS.PROTEIN.RANK[protein],""),
col = c("#DB003F", "#1290D9"),
lty = c(1,2), lwd = 2,
bty = "n")
axis(side = 1, at = seq(0, 3, by = 1))
axis(side = 2, at = seq(0, 1, by = 0.2))
dev.copy2pdf(file = paste0(COX_loc,"/",
Today,".AEDB.CEA.Cox.",ep,".2G.",
# Today,".AEDB.CEA.Cox.",ep,".4G.",
TRAITS.PROTEIN.RANK[protein],".MODEL2.90days.pdf"), height = 12, width = 10, onefile = TRUE)
show(summary(cox))
cat(paste0("\n > writing the Cox-regression fashizzle to Excel...\n"))
COX.results.TEMP <- data.frame(matrix(NA, ncol = 12, nrow = 0))
COX.results.TEMP[1,] = COX.STAT(cox, "AEDB.CEA", ep, TRAITS.PROTEIN.RANK[protein])
COX.results = rbind(COX.results, COX.results.TEMP)
}
}
cat("- Edit the column names...\n")
colnames(COX.results) = c("Dataset", "Outcome", "CpG",
"Beta", "s.e.m.",
"HR", "low95CI", "up95CI",
"Z-value", "P-value", "SampleSize", "N_events")
cat("- Correct the variable types...\n")
COX.results$Beta <- as.numeric(COX.results$Beta)
COX.results$s.e.m. <- as.numeric(COX.results$s.e.m.)
COX.results$HR <- as.numeric(COX.results$HR)
COX.results$low95CI <- as.numeric(COX.results$low95CI)
COX.results$up95CI <- as.numeric(COX.results$up95CI)
COX.results$`Z-value` <- as.numeric(COX.results$`Z-value`)
COX.results$`P-value` <- as.numeric(COX.results$`P-value`)
COX.results$SampleSize <- as.numeric(COX.results$SampleSize)
COX.results$N_events <- as.numeric(COX.results$N_events)
AEDB.CEA.COX.results <- COX.results
# Save the data
cat("- Writing results to Excel-file...\n")
head.style <- createStyle(textDecoration = "BOLD")
write.xlsx(AEDB.CEA.COX.results,
file = paste0(OUT_loc, "/",Today,".AEDB.CEA.Cox.2G.MODEL2.90days.xlsx"),
creator = "Sander W. van der Laan",
sheetName = "Results", headerStyle = head.style,
row.names = FALSE, col.names = TRUE, overwrite = TRUE)
# Removing intermediates
cat("- Removing intermediate files...\n")
rm(TEMP.DF, protein, fit, cox, coxplot, COX.results, COX.results.TEMP, head.style, AEDB.CEA.COX.results)
rm(head.style)
We correlated serum and plaque levels of the biomarkers.
# Installation of ggcorrplot()
# --------------------------------
if(!require(devtools))
install.packages("devtools")
devtools::install_github("kassambara/ggcorrplot")
Skipping install of 'ggcorrplot' from a github remote, the SHA1 (c46b4cce) has not changed since last install.
Use `force = TRUE` to force installation
library(ggcorrplot)
# Creating matrix - inverse-rank transformation
# --------------------------------
# AEDB.CEA.temp <- subset(AEDB.CEA,
# select = c("IL6_rank", "MCP1_rank", "IL6_pg_ug_2015_rank", "MCP1_pg_ug_2015_rank", "IL6R_pg_ug_2015_rank",
# TRAITS.BIN, TRAITS.CON.RANK)
# )
# AEDB.CEA.temp <- subset(AEDB.CEA,
# select = c("MCP1_rank", "MCP1_pg_ug_2015_rank",
# TRAITS.BIN, TRAITS.CON.RANK)
# )
AEDB.CEA.temp <- subset(AEDB.CEA,
select = c("MCP1_pg_ug_2015_rank",
TRAITS.BIN, TRAITS.CON.RANK)
)
AEDB.CEA.temp$CalcificationPlaque <- as.numeric(AEDB.CEA.temp$CalcificationPlaque)
AEDB.CEA.temp$CollagenPlaque <- as.numeric(AEDB.CEA.temp$CollagenPlaque)
AEDB.CEA.temp$Fat10Perc <- as.numeric(AEDB.CEA.temp$Fat10Perc)
AEDB.CEA.temp$IPH <- as.numeric(AEDB.CEA.temp$IPH)
str(AEDB.CEA.temp)
tibble [2,421 × 8] (S3: tbl_df/tbl/data.frame)
$ MCP1_pg_ug_2015_rank: num [1:2421] NA 0.937 2.158 1.209 1.996 ...
$ CalcificationPlaque : num [1:2421] 1 1 1 1 1 2 2 1 2 1 ...
$ CollagenPlaque : num [1:2421] 1 2 2 2 2 1 NA 2 2 2 ...
$ Fat10Perc : num [1:2421] 2 2 2 2 2 2 2 2 2 2 ...
$ IPH : num [1:2421] 2 2 2 2 1 2 2 2 2 2 ...
$ Macrophages_rank : num [1:2421] 1.388 1.12 1.365 0.721 0.396 ...
$ SMC_rank : num [1:2421] 1.67023 1.13089 0.00295 1.42623 1.2689 ...
$ VesselDensity_rank : num [1:2421] -0.529 -0.977 -0.774 0.716 1.099 ...
AEDB.CEA.matrix.RANK <- as.matrix(AEDB.CEA.temp)
rm(AEDB.CEA.temp)
corr_biomarkers.rank <- round(cor(AEDB.CEA.matrix.RANK,
use = "pairwise.complete.obs", #the correlation or covariance between each pair of variables is computed using all complete pairs of observations on those variables
method = "spearman"), 3)
# corr_biomarkers.rank
corr_biomarkers_p.rank <- ggcorrplot::cor_pmat(AEDB.CEA.matrix.RANK, use = "pairwise.complete.obs", method = "spearman")
Cannot compute exact p-value with tiesCannot compute exact p-value with tiesCannot compute exact p-value with tiesCannot compute exact p-value with tiesCannot compute exact p-value with tiesCannot compute exact p-value with tiesCannot compute exact p-value with tiesCannot compute exact p-value with tiesCannot compute exact p-value with tiesCannot compute exact p-value with tiesCannot compute exact p-value with tiesCannot compute exact p-value with tiesCannot compute exact p-value with tiesCannot compute exact p-value with tiesCannot compute exact p-value with tiesCannot compute exact p-value with tiesCannot compute exact p-value with tiesCannot compute exact p-value with tiesCannot compute exact p-value with tiesCannot compute exact p-value with tiesCannot compute exact p-value with tiesCannot compute exact p-value with tiesCannot compute exact p-value with tiesCannot compute exact p-value with tiesCannot compute exact p-value with tiesCannot compute exact p-value with tiesCannot compute exact p-value with tiesCannot compute exact p-value with ties
# Add correlation coefficients
# --------------------------------
# argument lab = TRUE
ggcorrplot(corr_biomarkers.rank,
method = "square",
type = "lower",
title = "Cross biomarker correlations",
show.legend = TRUE, legend.title = bquote("Spearman's"~italic(rho)),
ggtheme = ggplot2::theme_minimal, outline.color = "#FFFFFF",
show.diag = TRUE,
hc.order = FALSE,
lab = FALSE,
digits = 3,
# p.mat = corr_biomarkers_p.rank, sig.level = 0.05,
colors = c("#1290D9", "#FFFFFF", "#E55738"))
# flattenCorrMatrix
# --------------------------------
# cormat : matrix of the correlation coefficients
# pmat : matrix of the correlation p-values
flattenCorrMatrix <- function(cormat, pmat) {
ut <- upper.tri(cormat)
data.frame(
biomarker_row = rownames(cormat)[row(cormat)[ut]],
biomarker_column = rownames(cormat)[col(cormat)[ut]],
spearman_cor =(cormat)[ut],
pval = pmat[ut]
)
}
corr_biomarkers.rank.df <- as.data.table(flattenCorrMatrix(corr_biomarkers.rank, corr_biomarkers_p.rank))
DT::datatable(corr_biomarkers.rank.df)
# chart of a correlation matrix
# --------------------------------
# Alternative solution https://www.r-graph-gallery.com/199-correlation-matrix-with-ggally.html
install.packages.auto("PerformanceAnalytics")
chart.Correlation.new <- function (R, histogram = TRUE, method = c("pearson", "kendall",
"spearman"), ...)
{
x = checkData(R, method = "matrix")
if (missing(method))
method = method[1]
cormeth <- method
panel.cor <- function(x, y, digits = 2, prefix = "", use = "pairwise.complete.obs",
method = cormeth, cex.cor, ...) {
usr <- par("usr")
on.exit(par(usr))
par(usr = c(0, 1, 0, 1))
r <- cor(x, y, use = use, method = method)
txt <- format(c(r, 0.123456789), digits = digits)[1]
txt <- paste(prefix, txt, sep = "")
if (missing(cex.cor))
cex <- 0.8/strwidth(txt)
test <- cor.test(as.numeric(x), as.numeric(y), method = method)
Signif <- symnum(test$p.value, corr = FALSE, na = FALSE,
cutpoints = c(0, 0.001, 0.01, 0.05, 0.1, 1), symbols = c("***",
"**", "*", ".", " "))
text(0.5, 0.5, txt, cex = cex * (abs(r) + 0.3)/1.3)
text(0.8, 0.8, Signif, cex = cex, col = 2)
}
f <- function(t) {
dnorm(t, mean = mean(x), sd = sd.xts(x))
}
dotargs <- list(...)
dotargs$method <- NULL
rm(method)
hist.panel = function(x, ... = NULL) {
par(new = TRUE)
hist(x, col = "#1290D9", probability = TRUE, axes = FALSE,
# hist(x, col = "light gray", probability = TRUE, axes = FALSE,
main = "", breaks = "FD")
lines(density(x, na.rm = TRUE), col = "#E55738", lwd = 1)
rug(x)
}
if (histogram)
pairs(x, gap = 0, lower.panel = panel.smooth, upper.panel = panel.cor,
diag.panel = hist.panel, ...)
else pairs(x, gap = 0, lower.panel = panel.smooth, upper.panel = panel.cor, ...)
}
chart.Correlation.new(AEDB.CEA.matrix.RANK, method = "spearman", histogram = TRUE, pch = 3)
# alternative chart of a correlation matrix
# --------------------------------
# Alternative solution https://www.r-graph-gallery.com/199-correlation-matrix-with-ggally.html
install.packages.auto("GGally")
# Quick display of two cabapilities of GGally, to assess the distribution and correlation of variables
library(GGally)
# From the help page:
# ggpairs(AEDB.CEA,
# columns = c("MCP1_rank", "MCP1_pg_ug_2015_rank", TRAITS.BIN, TRAITS.CON.RANK),
# columnLabels = c("MCP1 (serum)", "MCP1",
# "Calcification", "Collagen", "Fat 10%", "IPH", "Macrophages", "SMC", "Vessel density"),
# method = c("spearman"),
# # ggplot2::aes(colour = Gender),
# progress = FALSE)
ggpairs(AEDB.CEA,
columns = c("MCP1_pg_ug_2015_rank", TRAITS.BIN, TRAITS.CON.RANK),
columnLabels = c("MCP1",
"Calcification", "Collagen", "Fat 10%", "IPH", "Macrophages", "SMC", "Vessel density"),
method = c("spearman"),
# ggplot2::aes(colour = Gender),
progress = FALSE)
Extra arguments: 'method' are being ignored. If these are meant to be aesthetics, submit them using the 'mapping' variable within ggpairs with ggplot2::aes or ggplot2::aes_string.
Finally, we explored in a sub-sample, where circulating MCP-1 levels are available, the following:
NOT AVAILABLE YET
# Installation of ggcorrplot()
# --------------------------------
if(!require(devtools))
install.packages("devtools")
devtools::install_github("kassambara/ggcorrplot")
library(ggcorrplot)
# Creating matrix - inverse-rank transformation
# --------------------------------
AEDB.CEA.temp <- subset(AEDB.CEA,
select = c("MCP1_rank",
TRAITS.BIN, TRAITS.CON.RANK,
"Symptoms.5G", "AsymptSympt", "EP_major", "EP_composite")
)
AEDB.CEA.temp$CalcificationPlaque <- as.numeric(AEDB.CEA.temp$CalcificationPlaque)
AEDB.CEA.temp$CollagenPlaque <- as.numeric(AEDB.CEA.temp$CollagenPlaque)
AEDB.CEA.temp$Fat10Perc <- as.numeric(AEDB.CEA.temp$Fat10Perc)
AEDB.CEA.temp$IPH <- as.numeric(AEDB.CEA.temp$IPH)
AEDB.CEA.temp$Symptoms.5G <- as.numeric(AEDB.CEA.temp$Symptoms.5G)
AEDB.CEA.temp$AsymptSympt <- as.numeric(AEDB.CEA.temp$AsymptSympt)
AEDB.CEA.temp$EP_major <- as.numeric(AEDB.CEA.temp$EP_major)
AEDB.CEA.temp$EP_composite <- as.numeric(AEDB.CEA.temp$EP_composite)
# str(AEDB.CEA.temp)
AEDB.CEA.matrix.serum.RANK <- as.matrix(AEDB.CEA.temp)
rm(AEDB.CEA.temp)
corr_biomarkers_serum.rank <- round(cor(AEDB.CEA.matrix.serum.RANK,
use = "pairwise.complete.obs", #the correlation or covariance between each pair of variables is computed using all complete pairs of observations on those variables
method = "spearman"), 3)
# corr_biomarkers.rank
corr_biomarkers_serum_p.rank <- ggcorrplot::cor_pmat(AEDB.CEA.matrix.serum.RANK, use = "pairwise.complete.obs", method = "spearman")
# Add correlation coefficients
# --------------------------------
# argument lab = TRUE
ggcorrplot(corr_biomarkers_serum.rank,
method = "square",
type = "lower",
title = "Cross biomarker correlations",
show.legend = TRUE, legend.title = bquote("Spearman's"~italic(rho)),
ggtheme = ggplot2::theme_minimal, outline.color = "#FFFFFF",
show.diag = TRUE,
hc.order = FALSE,
lab = FALSE,
digits = 3,
# p.mat = corr_biomarkers_serum_p.rank, sig.level = 0.05,
colors = c("#1290D9", "#FFFFFF", "#E55738"))
# flattenCorrMatrix
# --------------------------------
# cormat : matrix of the correlation coefficients
# pmat : matrix of the correlation p-values
flattenCorrMatrix <- function(cormat, pmat) {
ut <- upper.tri(cormat)
data.frame(
biomarker_row = rownames(cormat)[row(cormat)[ut]],
biomarker_column = rownames(cormat)[col(cormat)[ut]],
spearman_cor =(cormat)[ut],
pval = pmat[ut]
)
}
corr_biomarkers_serum.rank.df <- as.data.table(flattenCorrMatrix(corr_biomarkers_serum.rank, corr_biomarkers_serum_p.rank))
DT::datatable(corr_biomarkers_serum.rank.df)
# chart of a correlation matrix
# --------------------------------
# Alternative solution https://www.r-graph-gallery.com/199-correlation-matrix-with-ggally.html
install.packages.auto("PerformanceAnalytics")
chart.Correlation.new <- function (R, histogram = TRUE, method = c("pearson", "kendall",
"spearman"), ...)
{
x = checkData(R, method = "matrix")
if (missing(method))
method = method[1]
cormeth <- method
panel.cor <- function(x, y, digits = 2, prefix = "", use = "pairwise.complete.obs",
method = cormeth, cex.cor, ...) {
usr <- par("usr")
on.exit(par(usr))
par(usr = c(0, 1, 0, 1))
r <- cor(x, y, use = use, method = method)
txt <- format(c(r, 0.123456789), digits = digits)[1]
txt <- paste(prefix, txt, sep = "")
if (missing(cex.cor))
cex <- 0.8/strwidth(txt)
test <- cor.test(as.numeric(x), as.numeric(y), method = method)
Signif <- symnum(test$p.value, corr = FALSE, na = FALSE,
cutpoints = c(0, 0.001, 0.01, 0.05, 0.1, 1), symbols = c("***",
"**", "*", ".", " "))
text(0.5, 0.5, txt, cex = cex * (abs(r) + 0.3)/1.3)
text(0.8, 0.8, Signif, cex = cex, col = 2)
}
f <- function(t) {
dnorm(t, mean = mean(x), sd = sd.xts(x))
}
dotargs <- list(...)
dotargs$method <- NULL
rm(method)
hist.panel = function(x, ... = NULL) {
par(new = TRUE)
hist(x, col = "#1290D9", probability = TRUE, axes = FALSE,
# hist(x, col = "light gray", probability = TRUE, axes = FALSE,
main = "", breaks = "FD")
lines(density(x, na.rm = TRUE), col = "#E55738", lwd = 1)
rug(x)
}
if (histogram)
pairs(x, gap = 0, lower.panel = panel.smooth, upper.panel = panel.cor,
diag.panel = hist.panel, ...)
else pairs(x, gap = 0, lower.panel = panel.smooth, upper.panel = panel.cor, ...)
}
chart.Correlation.new(AEDB.CEA.matrix.serum.RANK, method = "spearman", histogram = TRUE, pch = 3)
# alternative chart of a correlation matrix
# --------------------------------
# Alternative solution https://www.r-graph-gallery.com/199-correlation-matrix-with-ggally.html
install.packages.auto("GGally")
# Quick display of two cabapilities of GGally, to assess the distribution and correlation of variables
library(GGally)
# From the help page:
ggpairs(AEDB.CEA,
columns = c("MCP1_rank", TRAITS.BIN, TRAITS.CON.RANK, "Symptoms.5G", "AsymptSympt", "EP_major", "EP_composite"),
columnLabels = c("MCP1 (serum)",
"Calcification", "Collagen", "Fat 10%", "IPH", "Macrophages", "SMC", "Vessel density",
"Symptoms", "Symptoms (grouped)", "MACE", "Composite"),
method = c("spearman"),
# ggplot2::aes(colour = Gender),
progress = FALSE)
We want to create per-age-group figures.
library(dplyr)
AEDB.CEA <- AEDB.CEA %>% mutate(AgeGroup = factor(case_when(Age < 55 ~ "<55",
Age >= 55 & Age <= 64 ~ "55-64",
Age >= 65 & Age <= 74 ~ "65-74",
Age >= 75 & Age <= 84 ~ "75-84",
Age >= 85 ~ "85+")))
AEDB.CEA <- AEDB.CEA %>% mutate(AgeGroupSex = factor(case_when(Age < 55 & Gender == "male" ~ "<55 males" ,
Age >= 55 & Age <= 64 & Gender == "male"~ "55-64 males",
Age >= 65 & Age <= 74 & Gender == "male"~ "65-74 males",
Age >= 75 & Age <= 84 & Gender == "male"~ "75-84 males",
Age >= 85 & Gender == "male"~ "85+ males",
Age < 55 & Gender == "female" ~ "<55 females" ,
Age >= 55 & Age <= 64 & Gender == "female"~ "55-64 females ",
Age >= 65 & Age <= 74 & Gender == "female"~ "65-74 females",
Age >= 75 & Age <= 84 & Gender == "female"~ "75-84 females",
Age >= 85 & Gender == "female"~ "85+ females")))
table(AEDB.CEA$AgeGroup, AEDB.CEA$Gender)
female male
<55 45 98
55-64 193 410
65-74 264 687
75-84 202 438
85+ 34 50
table(AEDB.CEA$AgeGroupSex)
<55 females <55 males 55-64 females 55-64 males 65-74 females 65-74 males 75-84 females 75-84 males 85+ females
45 98 193 410 264 687 202 438 34
85+ males
50
Now we can draw some graphs of serum/plaque MCP1 levels per sex and age group.
# ?ggpubr::ggboxplot()
# Global test
# compare_means(MCP1_pg_ug_2015_rank ~ Gender, data = AEDB.CEA, method = "wilcox.test")
ggpubr::ggboxplot(AEDB.CEA,
x = c("Gender"),
y = "MCP1_pg_ug_2015_rank",
xlab = "Gender",
ylab = "MCP1 plaque [pg/ug]\n(inverse-rank transformation)",
color = "Gender",
palette = c("#D5267B", "#1290D9"),
add = "jitter") #+
# stat_compare_means(method = "wilcox.test")
# compare_means(MCP1_pg_ug_2015_rank ~ AgeGroup, data = AEDB.CEA, method = "kruskal.test")
ggpubr::ggboxplot(AEDB.CEA,
x = c("AgeGroup"),
y = "MCP1_pg_ug_2015_rank",
xlab = "Age groups (years) per gender",
ylab = "MCP1 plaque [pg/ug]\n(inverse-rank transformation)",
color = "Gender",
palette = c("#D5267B", "#1290D9"),
add = "jitter") #+
# stat_compare_means(method = "kruskal.test")
NOT AVAILABLE YET
# compare_means(MCP1_rank ~ Gender, data = AEDB.CEA, method = "wilcox.test")
ggpubr::ggboxplot(AEDB.CEA,
x = c("Gender"),
y = "MCP1_rank",
xlab = "Gender",
ylab = "MCP1 serum [pg/mL]\n(inverse-rank transformation)",
color = "Gender",
palette = c("#D5267B", "#1290D9"),
add = "jitter") #+
# stat_compare_means(method = "wilcox.test")
# compare_means(MCP1_rank ~ AgeGroup, data = AEDB.CEA, method = "kruskal.test")
ggpubr::ggboxplot(AEDB.CEA,
x = c("AgeGroup"),
y = "MCP1_rank",
xlab = "Age groups (years) per gender",
ylab = "MCP1 serum [pg/mL]\n(inverse-rank transformation)",
color = "Gender",
palette = c("#D5267B", "#1290D9"),
add = "jitter") #+
# stat_compare_means(method = "kruskal.test")
Simalarly but now for the raw data as median ± interquartile range.
# ?ggpubr::ggboxplot()
# Global test
# compare_means(MCP1_pg_ug_2015_rank ~ Gender, data = AEDB.CEA, method = "wilcox.test")
ggpubr::ggboxplot(AEDB.CEA,
x = c("Gender"),
y = "MCP1_pg_ug_2015_rank",
xlab = "Gender",
ylab = "MCP1 plaque [pg/ug]\n(inverse-rank transformation)",
color = "Gender",
palette = c("#D5267B", "#1290D9"),
add = "jitter") #+
# stat_compare_means(method = "wilcox.test")
# compare_means(MCP1_pg_ug_2015_rank ~ AgeGroup, data = AEDB.CEA, method = "kruskal.test")
ggpubr::ggboxplot(AEDB.CEA,
x = c("AgeGroup"),
y = "MCP1_pg_ug_2015_rank",
xlab = "Age groups (years) per gender",
ylab = "MCP1 plaque [pg/ug]\n(inverse-rank transformation)",
color = "Gender",
palette = c("#D5267B", "#1290D9"),
add = "jitter") #+
# stat_compare_means(method = "kruskal.test")
NOT AVAILABLE YET
ggpubr::ggboxplot(AEDB.CEA,
x = c("Gender"),
y = "MCP1",
xlab = "Gender",
ylab = "MCP1 serum [pg/mL]",
color = "Gender",
palette = c("#D5267B", "#1290D9"),
# add = "median_iqr")
add = c("median_iqr", "jitter"))
ggpubr::ggboxplot(AEDB.CEA,
x = c("AgeGroup"),
y = "MCP1",
xlab = "Age groups (years) per gender",
ylab = "MCP1 serum [pg/mL]",
color = "Gender",
palette = c("#D5267B", "#1290D9"),
# add = "median_iqr")
add = c("median_iqr", "jitter"))
We will also make a nice correlation plot between serum and plaque MCP1 levels.
NOT AVAILABLE YET
ggpubr::ggscatter(AEDB.CEA,
x = "MCP1_pg_ug_2015",
y = "MCP1",
xlab = "MCP1 plaque [pg/ug]",
ylab = "MCP1 serum [pg/mL]",
add = "reg.line", add.params = list(color = "#1290D9"),
conf.int = TRUE,
cor.coef = TRUE, cor.coeff.args = list(method = "spearman"), cor.coef.coord = c(8,750))
ggpubr::ggscatter(AEDB.CEA,
x = "MCP1_pg_ug_2015_rank",
y = "MCP1_rank",
xlab = "MCP1 plaque [pg/ug]\n(inverse-rank transformation)",
ylab = "MCP1 serum [pg/mL]\n(inverse-rank transformation)",
add = "reg.line", add.params = list(color = "#1290D9"),
conf.int = TRUE,
cor.coef = TRUE, cor.coeff.args = list(method = "spearman"), cor.coef.coord = c(2,3))
We will also make a nice correlation plot between the two experiments of plaque MCP1 levels.
AEDB.CEA$MCP1_rank <- qnorm((rank(AEDB.CEA$MCP1, na.last = "keep") - 0.5) / sum(!is.na(AEDB.CEA$MCP1)))
ggpubr::ggscatter(AEDB.CEA,
x = "MCP1",
y = "MCP1_pg_ug_2015",
xlab = "MCP1 plaque [pg/mL] (exp. no. 1)",
ylab = "MCP1 plaque [pg/ug] (exp. no. 2)",
add = "reg.line", add.params = list(color = "#1290D9"),
conf.int = TRUE,
cor.coef = TRUE, cor.coeff.args = list(method = "spearman"), cor.coef.coord = c(8,750))
ggpubr::ggscatter(AEDB.CEA,
x = "MCP1_rank",
y = "MCP1_pg_ug_2015_rank",
xlab = "MCP1 plaque [pg/mL]\n(INRT, exp. no. 1)",
ylab = "MCP1 plaque [pg/ug]\n(INRT, exp. no. 2)",
add = "reg.line", add.params = list(color = "#1290D9"),
conf.int = TRUE,
cor.coef = TRUE, cor.coeff.args = list(method = "spearman"), cor.coef.coord = c(2,3))
We want to create per-symptom figures.
library(dplyr)
table(AEDB.CEA$AgeGroup, AEDB.CEA$AsymptSympt2G)
Asymptomatic Symptomatic
<55 24 119
55-64 76 527
65-74 124 827
75-84 43 597
85+ 3 81
table(AEDB.CEA$Gender, AEDB.CEA$AsymptSympt2G)
Asymptomatic Symptomatic
female 64 674
male 206 1477
table(AEDB.CEA$AsymptSympt2G)
Asymptomatic Symptomatic
270 2151
Now we can draw some graphs of serum/plaque MCP1 levels per symptom group.
# ?ggpubr::ggboxplot()
my_comparisons <- list(c("Asymptomatic", "Symptomatic"))
p1 <- ggpubr::ggboxplot(AEDB.CEA,
x = "AsymptSympt2G", y = "MCP1_pg_ug_2015_rank",
title = "MCP1 plaque [pg/ug] levels per symptom",
xlab = "Symptoms",
ylab = "MCP1 plaque [pg/ug]\n inverse-rank transformation",
color = "AsymptSympt2G",
palette = c(uithof_color[16], uithof_color[23]),
add = "dotplot", # Add dotplot
add.params = list(binwidth = 0.1, dotsize = 0.3)
) +
stat_compare_means(comparisons = my_comparisons, method = "wilcox.test")
ggpar(p1, legend = c("right"), legend.title = "Symptoms")
p1 <- ggpubr::ggboxplot(AEDB.CEA,
x = "AsymptSympt2G", y = "MCP1_rank",
title = "MCP1 serum [pg/mL] levels per symptom",
xlab = "Symptoms",
ylab = "MCP1 serum [pg/mL]\n inverse-rank transformation",
color = "AsymptSympt2G",
palette = c(uithof_color[16], uithof_color[23]),
add = "dotplot", # Add dotplot
add.params = list(binwidth = 0.1, dotsize = 0.3)
) +
stat_compare_means(comparisons = my_comparisons, method = "wilcox.test")
ggpar(p1, legend = c("right"), legend.title = "Symptoms")
rm(p1)
We would also like to visualize the multivariable analyses results.
library(ggplot2)
library(openxlsx)
model1_mcp1 <- read.xlsx(paste0(OUT_loc, "/", Today, ".AEDB.CEA.Bin.Uni.Protein.RANK.Symptoms.MODEL1.xlsx"))
model2_mcp1 <- read.xlsx(paste0(OUT_loc, "/", Today, ".AEDB.CEA.Bin.Multi.Protein.RANK.Symptoms.MODEL2.xlsx"))
model1_mcp1$model <- "univariate"
model2_mcp1$model <- "multivariate"
models_mcp1 <- rbind(model1_mcp1, model2_mcp1)
models_mcp1
NA
dat <- data.frame(group = factor(c("Age, sex-adjusted", "Age, sex, and adjusted for risk factors"),
levels=c("Age, sex, and adjusted for risk factors", "Age, sex-adjusted")),
cen = c(models_mcp1$OR[models_mcp1$Predictor=="MCP1_pg_ug_2015_rank"]),
low = c(models_mcp1$low95CI[models_mcp1$Predictor=="MCP1_pg_ug_2015_rank"]),
high = c(models_mcp1$up95CI[models_mcp1$Predictor=="MCP1_pg_ug_2015_rank"]))
fp <- ggplot(data=dat, aes(x=group, y=cen, ymin=low, ymax=high)) +
geom_pointrange() +
geom_hline(yintercept=1, lty=2) + # add a dotted line at x=1 after flip
coord_flip() + # flip coordinates (puts labels on y axis)
xlab("Model") + ylab("OR (95% CI) for symptomatic plaques") +
theme(text = element_text(size=14)) +
ggtitle("Plaque MCP-1 levels (1 SD increment)") +
theme_minimal() # use a white background
print(fp)
rm(fp)
dat <- data.frame(group = factor(c("Age, sex-adjusted", "Age, sex, and adjusted for risk factors"),
levels=c("Age, sex, and adjusted for risk factors", "Age, sex-adjusted")),
cen = c(models_mcp1$OR[models_mcp1$Predictor=="MCP1_rank"]),
low = c(models_mcp1$low95CI[models_mcp1$Predictor=="MCP1_rank"]),
high = c(models_mcp1$up95CI[models_mcp1$Predictor=="MCP1_rank"]))
fp <- ggplot(data=dat, aes(x=group, y=cen, ymin=low, ymax=high)) +
geom_pointrange() +
geom_hline(yintercept=1, lty=2) + # add a dotted line at x=1 after flip
coord_flip() + # flip coordinates (puts labels on y axis)
xlab("Model") + ylab("OR (95% CI) for symptomatic plaques") +
theme(text = element_text(size=14)) +
ggtitle("Serum MCP-1 levels (1 SD increment)") +
theme_minimal() # use a white background
print(fp)
rm(fp)
We will plot the correlations of other cytokine plaque levels to the MCP1 plaque levels. These include:
In addition we will look at three metalloproteinases which were measured using an activity assay.
The proteins were measured using FACS and LUMINEX. Given the different platforms used (FACS vs. LUMINEX), we will inverse rank-normalize these variables as well to scale them to the same scale as the MCP1 plaque levels.
cytokines <- c("IL2", "IL4", "IL5", "IL6", "IL8", "IL9", "IL10", "IL12", "IL13", "IL21",
"INFG", "TNFA", "MIF", "MCP1", "MIP1a", "RANTES", "MIG", "IP10", "Eotaxin1",
"TARC", "PARC", "MDC", "OPG", "sICAM1", "VEGFA", "TGFB")
metalloproteinases <- c("MMP2", "MMP8", "MMP9")
# fix names
names(AEDB.CEA)[names(AEDB.CEA) == "VEFGA"] <- "VEGFA"
proteins_of_interest <- c(cytokines, metalloproteinases)
proteins_of_interest_rank = unlist(lapply(proteins_of_interest, paste0, "_rank"))
# make variables numerics()
AEDB.CEA <- AEDB.CEA %>%
mutate_each(funs(as.numeric), proteins_of_interest)
funs() is soft deprecated as of dplyr 0.8.0
Please use a list of either functions or lambdas:
# Simple named list:
list(mean = mean, median = median)
# Auto named with `tibble::lst()`:
tibble::lst(mean, median)
# Using lambdas
list(~ mean(., trim = .2), ~ median(., na.rm = TRUE))
[90mThis warning is displayed once per session.[39mNote: Using an external vector in selections is ambiguous.
[34mℹ[39m Use `all_of(proteins_of_interest)` instead of `proteins_of_interest` to silence this message.
[34mℹ[39m See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
[90mThis message is displayed once per session.[39m
for(PROTEIN in 1:length(proteins_of_interest)){
# UCORBIOGSAqc$Z <- NULL
var.temp.rank = proteins_of_interest_rank[PROTEIN]
var.temp = proteins_of_interest[PROTEIN]
cat(paste0("\nSelecting ", var.temp, " and standardising: ", var.temp.rank,".\n"))
cat(paste0("* changing ", var.temp, " to numeric.\n"))
# AEDB.CEA <- AEDB.CEA %>% mutate(AEDB.CEA[,var.temp] == replace(AEDB.CEA[,var.temp], AEDB.CEA[,var.temp]==0, NA))
AEDB.CEA[,var.temp][AEDB.CEA[,var.temp]==0.000000]=NA
cat(paste0("* standardising ", var.temp,
" (mean: ",round(mean(!is.na(AEDB.CEA[,var.temp])), digits = 6),
", n = ",sum(!is.na(AEDB.CEA[,var.temp])),").\n"))
AEDB.CEA <- AEDB.CEA %>%
mutate_at(vars(var.temp),
# list(Z = ~ (AEDB.CEA[,var.temp] - mean(AEDB.CEA[,var.temp], na.rm = TRUE))/sd(AEDB.CEA[,var.temp], na.rm = TRUE))
list(RANK = ~ qnorm((rank(AEDB.CEA[,var.temp], na.last = "keep") - 0.5) / sum(!is.na(AEDB.CEA[,var.temp]))))
)
# str(UCORBIOGSAqc$Z)
cat(paste0("* renaming RANK to ", var.temp.rank,".\n"))
AEDB.CEA[,var.temp.rank] <- NULL
names(AEDB.CEA)[names(AEDB.CEA) == "RANK"] <- var.temp.rank
}
Selecting IL2 and standardising: IL2_rank.
* changing IL2 to numeric.
* standardising IL2 (mean: 0.180091, n = 436).
Note: Using an external vector in selections is ambiguous.
[34mℹ[39m Use `all_of(var.temp)` instead of `var.temp` to silence this message.
[34mℹ[39m See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
[90mThis message is displayed once per session.[39m
* renaming RANK to IL2_rank.
Selecting IL4 and standardising: IL4_rank.
* changing IL4 to numeric.
* standardising IL4 (mean: 0.167699, n = 406).
* renaming RANK to IL4_rank.
Selecting IL5 and standardising: IL5_rank.
* changing IL5 to numeric.
* standardising IL5 (mean: 0.178439, n = 432).
* renaming RANK to IL5_rank.
Selecting IL6 and standardising: IL6_rank.
* changing IL6 to numeric.
* standardising IL6 (mean: 0.188352, n = 456).
* renaming RANK to IL6_rank.
Selecting IL8 and standardising: IL8_rank.
* changing IL8 to numeric.
* standardising IL8 (mean: 0.182156, n = 441).
* renaming RANK to IL8_rank.
Selecting IL9 and standardising: IL9_rank.
* changing IL9 to numeric.
* standardising IL9 (mean: 0.206526, n = 500).
* renaming RANK to IL9_rank.
Selecting IL10 and standardising: IL10_rank.
* changing IL10 to numeric.
* standardising IL10 (mean: 0.159025, n = 385).
* renaming RANK to IL10_rank.
Selecting IL12 and standardising: IL12_rank.
* changing IL12 to numeric.
* standardising IL12 (mean: 0.168112, n = 407).
* renaming RANK to IL12_rank.
Selecting IL13 and standardising: IL13_rank.
* changing IL13 to numeric.
* standardising IL13 (mean: 0.232962, n = 564).
* renaming RANK to IL13_rank.
Selecting IL21 and standardising: IL21_rank.
* changing IL21 to numeric.
* standardising IL21 (mean: 0.233375, n = 565).
* renaming RANK to IL21_rank.
Selecting INFG and standardising: INFG_rank.
* changing INFG to numeric.
* standardising INFG (mean: 0.179265, n = 434).
* renaming RANK to INFG_rank.
Selecting TNFA and standardising: TNFA_rank.
* changing TNFA to numeric.
* standardising TNFA (mean: 0.163569, n = 396).
* renaming RANK to TNFA_rank.
Selecting MIF and standardising: MIF_rank.
* changing MIF to numeric.
* standardising MIF (mean: 0.233375, n = 565).
* renaming RANK to MIF_rank.
Selecting MCP1 and standardising: MCP1_rank.
* changing MCP1 to numeric.
* standardising MCP1 (mean: 0.229657, n = 556).
* renaming RANK to MCP1_rank.
Selecting MIP1a and standardising: MIP1a_rank.
* changing MIP1a to numeric.
* standardising MIP1a (mean: 0.211896, n = 513).
* renaming RANK to MIP1a_rank.
Selecting RANTES and standardising: RANTES_rank.
* changing RANTES to numeric.
* standardising RANTES (mean: 0.228831, n = 554).
* renaming RANK to RANTES_rank.
Selecting MIG and standardising: MIG_rank.
* changing MIG to numeric.
* standardising MIG (mean: 0.227179, n = 550).
* renaming RANK to MIG_rank.
Selecting IP10 and standardising: IP10_rank.
* changing IP10 to numeric.
* standardising IP10 (mean: 0.206113, n = 499).
* renaming RANK to IP10_rank.
Selecting Eotaxin1 and standardising: Eotaxin1_rank.
* changing Eotaxin1 to numeric.
* standardising Eotaxin1 (mean: 0.233375, n = 565).
* renaming RANK to Eotaxin1_rank.
Selecting TARC and standardising: TARC_rank.
* changing TARC to numeric.
* standardising TARC (mean: 0.200743, n = 486).
* renaming RANK to TARC_rank.
Selecting PARC and standardising: PARC_rank.
* changing PARC to numeric.
* standardising PARC (mean: 0.233375, n = 565).
* renaming RANK to PARC_rank.
Selecting MDC and standardising: MDC_rank.
* changing MDC to numeric.
* standardising MDC (mean: 0.209831, n = 508).
* renaming RANK to MDC_rank.
Selecting OPG and standardising: OPG_rank.
* changing OPG to numeric.
* standardising OPG (mean: 0.232962, n = 564).
* renaming RANK to OPG_rank.
Selecting sICAM1 and standardising: sICAM1_rank.
* changing sICAM1 to numeric.
* standardising sICAM1 (mean: 0.233375, n = 565).
* renaming RANK to sICAM1_rank.
Selecting VEGFA and standardising: VEGFA_rank.
* changing VEGFA to numeric.
* standardising VEGFA (mean: 0.20157, n = 488).
* renaming RANK to VEGFA_rank.
Selecting TGFB and standardising: TGFB_rank.
* changing TGFB to numeric.
* standardising TGFB (mean: 0.23007, n = 557).
* renaming RANK to TGFB_rank.
Selecting MMP2 and standardising: MMP2_rank.
* changing MMP2 to numeric.
* standardising MMP2 (mean: 0.232135, n = 562).
* renaming RANK to MMP2_rank.
Selecting MMP8 and standardising: MMP8_rank.
* changing MMP8 to numeric.
* standardising MMP8 (mean: 0.232135, n = 562).
* renaming RANK to MMP8_rank.
Selecting MMP9 and standardising: MMP9_rank.
* changing MMP9 to numeric.
* standardising MMP9 (mean: 0.231722, n = 561).
* renaming RANK to MMP9_rank.
# rm(var.temp, var.temp.rank)
We will just visualize these transformations.
proteins_of_interest_rank_mcp1 <- c("MCP1_pg_ug_2015_rank", proteins_of_interest_rank)
proteins_of_interest_mcp1 <- c("MCP1_pg_ug_2015", proteins_of_interest)
for(PROTEIN in proteins_of_interest_mcp1){
cat(paste0("Plotting protein ", PROTEIN, ".\n"))
p1 <- ggpubr::gghistogram(AEDB.CEA, PROTEIN,
# y = "..count..",
color = "white",
fill = "Gender",
palette = c("#1290D9", "#DB003F"),
add = "mean",
# rug = TRUE,
# add.params = list(color = "black", linetype = 2),
title = paste0(PROTEIN, " plaque levels"),
xlab = "",
ggtheme = theme_minimal())
print(p1)
}
Plotting protein MCP1_pg_ug_2015.
Using `bins = 30` by default. Pick better value with the argument `bins`.
Plotting protein IL2.
Plotting protein IL4.
Plotting protein IL5.
Plotting protein IL6.
Plotting protein IL8.
Plotting protein IL9.
Plotting protein IL10.
Plotting protein IL12.
Plotting protein IL13.
Plotting protein IL21.
Plotting protein INFG.
Plotting protein TNFA.
Plotting protein MIF.
Plotting protein MCP1.
Plotting protein MIP1a.
Plotting protein RANTES.
Plotting protein MIG.
Plotting protein IP10.
Plotting protein Eotaxin1.
Plotting protein TARC.
Plotting protein PARC.
Plotting protein MDC.
Plotting protein OPG.
Plotting protein sICAM1.
Plotting protein VEGFA.
Plotting protein TGFB.
Plotting protein MMP2.
Plotting protein MMP8.
Plotting protein MMP9.
for(PROTEIN in proteins_of_interest_rank_mcp1){
cat(paste0("Plotting protein ", PROTEIN, ".\n"))
p1 <- ggpubr::gghistogram(AEDB.CEA, PROTEIN,
# y = "..count..",
color = "white",
fill = "Gender",
palette = c("#1290D9", "#DB003F"),
add = "mean",
# rug = TRUE,
# add.params = list(color = "black", linetype = 2),
title = paste0(PROTEIN, " plaque levels"),
xlab = "inverse-normal transformation",
ggtheme = theme_minimal())
print(p1)
}
Plotting protein MCP1_pg_ug_2015_rank.
Using `bins = 30` by default. Pick better value with the argument `bins`.
Plotting protein IL2_rank.
Plotting protein IL4_rank.
Plotting protein IL5_rank.
Plotting protein IL6_rank.
Plotting protein IL8_rank.
Plotting protein IL9_rank.
Plotting protein IL10_rank.
Plotting protein IL12_rank.
Plotting protein IL13_rank.
Plotting protein IL21_rank.
Plotting protein INFG_rank.
Plotting protein TNFA_rank.
Plotting protein MIF_rank.
Plotting protein MCP1_rank.
Plotting protein MIP1a_rank.
Plotting protein RANTES_rank.
Plotting protein MIG_rank.
Plotting protein IP10_rank.
Plotting protein Eotaxin1_rank.
Plotting protein TARC_rank.
Plotting protein PARC_rank.
Plotting protein MDC_rank.
Plotting protein OPG_rank.
Plotting protein sICAM1_rank.
Plotting protein VEGFA_rank.
Plotting protein TGFB_rank.
Plotting protein MMP2_rank.
Plotting protein MMP8_rank.
Plotting protein MMP9_rank.
NA
# Installation of ggcorrplot()
# --------------------------------
if(!require(devtools))
install.packages("devtools")
devtools::install_github("kassambara/ggcorrplot")
Skipping install of 'ggcorrplot' from a github remote, the SHA1 (c46b4cce) has not changed since last install.
Use `force = TRUE` to force installation
library(ggcorrplot)
# Creating matrix - inverse-rank transformation
# --------------------------------
AEDB.CEA.temp <- subset(AEDB.CEA,
select = c(proteins_of_interest_rank_mcp1)
)
# str(AEDB.CEA.temp)
AEDB.CEA.matrix.RANK <- as.matrix(AEDB.CEA.temp)
rm(AEDB.CEA.temp)
corr_biomarkers.rank <- round(cor(AEDB.CEA.matrix.RANK,
use = "pairwise.complete.obs", #the correlation or covariance between each pair of variables is computed using all complete pairs of observations on those variables
method = "spearman"), 3)
# corr_biomarkers.rank
rename_proteins_of_interest_mcp1 <- c("MCP1 (L, exp2)",
"IL2", "IL4", "IL5", "IL6", "IL8", "IL9", "IL10", "IL12",
"IL13 (L)", "IL21 (L)",
"INFG", "TNFA", "MIF (L)",
"MCP1 (L, exp1)", "MIP1a (L)", "RANTES (L)", "MIG (L)", "IP10 (L)",
"Eotaxin1 (L)", "TARC (L)", "PARC (L)", "MDC (L)",
"OPG (L)", "sICAM1 (L)", "VEGFA (E)", "TGFB (E)", "MMP2 (r)", "MMP8 (r)", "MMP9 (r)")
colnames(corr_biomarkers.rank) <- c(rename_proteins_of_interest_mcp1)
rownames(corr_biomarkers.rank) <- c(rename_proteins_of_interest_mcp1)
corr_biomarkers_p.rank <- ggcorrplot::cor_pmat(AEDB.CEA.matrix.RANK, use = "pairwise.complete.obs", method = "spearman")
# Add correlation coefficients
# --------------------------------
# argument lab = TRUE
ggcorrplot(corr_biomarkers.rank,
method = "square",
type = "lower",
title = "Cross biomarker correlations",
show.legend = TRUE, legend.title = bquote("Spearman's"~italic(rho)),
ggtheme = ggplot2::theme_minimal, outline.color = "#FFFFFF",
show.diag = TRUE,
hc.order = FALSE,
lab = FALSE,
digits = 3,
tl.cex = 6,
# xlab = c("MCP1"),
# p.mat = corr_biomarkers_p.rank, sig.level = 0.05,
colors = c("#1290D9", "#FFFFFF", "#E55738"))
Version: v1.0.8
Last update: 2020-06-25
Written by: Sander W. van der Laan (s.w.vanderlaan-2[at]umcutrecht.nl).
Description: Script to analyse MCP1 from the Ather-Express Biobank Study.
Minimum requirements: R version 3.5.2 (2018-12-20) -- 'Eggshell Igloo', macOS Mojave (10.14.2).
**MoSCoW To-Do List**
The things we Must, Should, Could, and Would have given the time we have.
_M_
* analysis on serum
_S_
* prettify forest plot
_C_
_W_
**Changes log**
* v1.0.8 Fixed error in MCP1 serum analysis. It turns out the `MCP1` and `MCP1_pg_ug_2015` variables are _both_ measured in plaque, in two separate experiments, exp. no. 1 and exp. no. 2, respectively.
* v1.0.7 Fixed the per Age-group MCP1 Box plots. Added correlations with other cytokines in plaques.
* v1.0.6 Only analyses and figures that end up in the final manuscript.
* v1.0.5 Update with 30- and 90-days survival.
* v1.0.4 Updated with Cox-regressions.
* v1.0.3 Included more models.
* v1.0.2 Bugs fixed.
* v1.0.1 Extended with linear and logistic regressions
* v1.0.0 Inital version
sessionInfo()
R version 3.6.3 (2020-02-29)
Platform: x86_64-apple-darwin19.4.0 (64-bit)
Running under: macOS Catalina 10.15.5
Matrix products: default
BLAS: /System/Library/Frameworks/Accelerate.framework/Versions/A/Frameworks/vecLib.framework/Versions/A/libBLAS.dylib
LAPACK: /usr/local/Cellar/openblas/0.3.10/lib/libopenblasp-r0.3.10.dylib
locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
attached base packages:
[1] tools stats graphics grDevices utils datasets methods base
other attached packages:
[1] openxlsx_4.1.5 GGally_1.5.0 PerformanceAnalytics_2.0.4 xts_0.12-0 zoo_1.8-8
[6] ggpubr_0.3.0 tableone_0.11.1 labelled_2.4.0 sjPlot_2.8.4 sjlabelled_1.1.5
[11] haven_2.3.0 MASS_7.3-51.6 DT_0.13 knitr_1.28 forcats_0.5.0
[16] stringr_1.4.0 purrr_0.3.4 tibble_3.0.1 tidyverse_1.3.0 data.table_1.12.8
[21] naniar_0.5.1 tidyr_1.1.0 dplyr_0.8.5 optparse_1.6.6 readr_1.3.1
[26] ggcorrplot_0.1.3.999 ggplot2_3.3.0 devtools_2.3.0 usethis_1.6.1
loaded via a namespace (and not attached):
[1] readxl_1.3.1 backports_1.1.7 plyr_1.8.6 splines_3.6.3 crosstalk_1.1.0.1 TH.data_1.0-10 digest_0.6.25
[8] htmltools_0.4.0 fansi_0.4.1 magrittr_1.5 memoise_1.1.0 remotes_2.1.1 modelr_0.1.8 sandwich_2.5-1
[15] prettyunits_1.1.1 colorspace_1.4-1 rvest_0.3.5 mitools_2.4 xfun_0.14 callr_3.4.3 crayon_1.3.4
[22] jsonlite_1.6.1 lme4_1.1-23 survival_3.1-12 glue_1.4.1 gtable_0.3.0 emmeans_1.4.7 sjstats_0.18.0
[29] sjmisc_2.8.4 car_3.0-8 pkgbuild_1.0.8 abind_1.4-5 scales_1.1.1 mvtnorm_1.1-0 DBI_1.1.0
[36] rstatix_0.5.0.999 ggeffects_0.14.3 Rcpp_1.0.4.6 xtable_1.8-4 performance_0.4.6 foreign_0.8-75 survey_4.0
[43] htmlwidgets_1.5.1 httr_1.4.1 getopt_1.20.3 RColorBrewer_1.1-2 ellipsis_0.3.1 reshape_0.8.8 pkgconfig_2.0.3
[50] farver_2.0.3 dbplyr_1.4.3 tidyselect_1.1.0 labeling_0.3 rlang_0.4.6 reshape2_1.4.4 effectsize_0.3.1
[57] munsell_0.5.0 cellranger_1.1.0 cli_2.0.2 generics_0.0.2 broom_0.5.6 evaluate_0.14 yaml_2.2.1
[64] processx_3.4.2 fs_1.4.1 zip_2.0.4 packrat_0.5.0 visdat_0.5.3 nlme_3.1-148 xml2_1.3.2
[71] compiler_3.6.3 rstudioapi_0.11 curl_4.3 testthat_2.3.2 ggsignif_0.6.0 reprex_0.3.0 statmod_1.4.34
[78] stringi_1.4.6 ps_1.3.3 parameters_0.7.0 desc_1.2.0 lattice_0.20-41 Matrix_1.2-18 nloptr_1.2.2.1
[85] vctrs_0.3.0 pillar_1.4.4 lifecycle_0.2.0 estimability_1.3 insight_0.8.4 R6_2.4.1 rio_0.5.16
[92] sessioninfo_1.1.1 codetools_0.2-16 boot_1.3-25 assertthat_0.2.1 pkgload_1.0.2 rprojroot_1.3-2 withr_2.2.0
[99] multcomp_1.4-13 mgcv_1.8-31 bayestestR_0.6.0 hms_0.5.3 quadprog_1.5-8 grid_3.6.3 coda_0.19-3
[106] minqa_1.2.4 rmarkdown_2.1 carData_3.0-4 base64enc_0.1-3 lubridate_1.7.8
save.image(paste0(PROJECT_loc, "/",Today,".",PROJECTNAME,".sample_selection.RData"))
| © 1979-2020 Sander W. van der Laan | s.w.vanderlaan-2[at]umcutrecht.nl | swvanderlaan.github.io. |